tgt_isa=x86_64
tgt_os=darwin13
;;
+ *darwin14*)
+ tgt_isa=x86_64
+ tgt_os=darwin14
+ ;;
x86_64*mingw32*)
tgt_os=win64
;;
add_cflags "-mmacosx-version-min=10.9"
add_ldflags "-mmacosx-version-min=10.9"
;;
+ *-darwin14-*)
+ add_cflags "-mmacosx-version-min=10.10"
+ add_ldflags "-mmacosx-version-min=10.10"
+ ;;
*-iphonesimulator-*)
add_cflags "-miphoneos-version-min=${IOS_VERSION_MIN}"
add_ldflags "-miphoneos-version-min=${IOS_VERSION_MIN}"
fi
tgt_os_no_version=$(echo "${tgt_os}" | tr -d "[0-9]")
+ if [ "${tgt_os_no_version}" = "darwin" ] || \
+ [ "${tgt_os_no_version}" = "openbsd" ] || [ "`uname`" = "OpenBSD" ]; then
+ openbsd_like=yes
+ fi
# Default use_x86inc to yes when we are 64 bit, non-pic, or on any
# non-Darwin target.
if [ "${tgt_isa}" = "x86_64" ] || [ "${pic}" != "yes" ] || \
- [ "${tgt_os_no_version}" != "darwin" ]; then
+ [ "${openbsd_like}" != "yes" ]; then
soft_enable use_x86inc
fi
${toggle_postproc_visualizer} macro block / block level visualizers
${toggle_multi_res_encoding} enable multiple-resolution encoding
${toggle_temporal_denoising} enable temporal denoising and disable the spatial denoiser
+ ${toggle_vp9_highbitdepth} enable 10/12 bit support in VP9
${toggle_vp9_temporal_denoising}
enable vp9 temporal denoising
${toggle_webm_io} enable input from and output to WebM container
all_platforms="${all_platforms} x86-darwin11-gcc"
all_platforms="${all_platforms} x86-darwin12-gcc"
all_platforms="${all_platforms} x86-darwin13-gcc"
+all_platforms="${all_platforms} x86-darwin14-gcc"
all_platforms="${all_platforms} x86-iphonesimulator-gcc"
all_platforms="${all_platforms} x86-linux-gcc"
all_platforms="${all_platforms} x86-linux-icc"
all_platforms="${all_platforms} x86_64-darwin11-gcc"
all_platforms="${all_platforms} x86_64-darwin12-gcc"
all_platforms="${all_platforms} x86_64-darwin13-gcc"
+all_platforms="${all_platforms} x86_64-darwin14-gcc"
all_platforms="${all_platforms} x86_64-iphonesimulator-gcc"
all_platforms="${all_platforms} x86_64-linux-gcc"
all_platforms="${all_platforms} x86_64-linux-icc"
all_platforms="${all_platforms} universal-darwin11-gcc"
all_platforms="${all_platforms} universal-darwin12-gcc"
all_platforms="${all_platforms} universal-darwin13-gcc"
+all_platforms="${all_platforms} universal-darwin14-gcc"
all_platforms="${all_platforms} generic-gnu"
# all_targets is a list of all targets that can be configured
# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
# by quotes) that should identify the project.
-PROJECT_NAME = "WebM VP8 Codec SDK"
+PROJECT_NAME = "WebM Codec SDK"
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.
SHOW_USED_FILES = YES
-# If the sources in your project are distributed over multiple directories
-# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
-# in the documentation. The default is NO.
-
-SHOW_DIRECTORIES = NO
-
# The FILE_VERSION_FILTER tag can be used to specify a program or script that
# doxygen should invoke to get the current version for each file (typically from the
# version control system). Doxygen will invoke the program by executing (via
HTML_STYLESHEET =
-# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
-# files or namespaces will be aligned in HTML using tables. If set to
-# NO a bullet list will be used.
-
-HTML_ALIGN_MEMBERS = YES
-
# If the GENERATE_HTMLHELP tag is set to YES, additional index files
# will be generated that can be used as input for tools like the
# Microsoft HTML help workshop to generate a compressed HTML help file (.chm)
-/*!\mainpage WebM VP8 Codec SDK
+/*!\mainpage WebM Codec SDK
\section main_contents Page Contents
- \ref main_intro
- \ref main_support
\section main_intro Introduction
- Welcome to the WebM VP8 Codec SDK. This SDK allows you to integrate your
- applications with the VP8 video codec, a high quality, royalty free, open
- source codec deployed on millions of computers and devices worldwide.
+ Welcome to the WebM Codec SDK. This SDK allows you to integrate your
+ applications with the VP8 and VP9 video codecs, high quality, royalty free,
+ open source codecs deployed on billions of computers and devices worldwide.
- This distribution of the WebM VP8 Codec SDK includes the following support:
+ This distribution of the WebM Codec SDK includes the following support:
\if vp8_encoder
- \ref vp8_encoder
- Read the \ref samples "sample code" for examples of how to interact with the
codec.
- \ref codec reference
- \if encoder
- - \ref encoder reference
- \endif
- \if decoder
- - \ref decoder reference
- \endif
+ \if encoder
+ - \ref encoder reference
+ \endif
+ \if decoder
+ - \ref decoder reference
+ \endif
\section main_support Support Options & FAQ
The WebM project is an open source project supported by its community. For
first_drop_ = 0;
bits_total_ = 0;
duration_ = 0.0;
+ denoiser_offon_test_ = 0;
+ denoiser_offon_period_ = -1;
}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
if (video->frame() == 1) {
encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_);
}
+
+ if (denoiser_offon_test_) {
+ ASSERT_GT(denoiser_offon_period_, 0)
+ << "denoiser_offon_period_ is not positive.";
+ if ((video->frame() + 1) % denoiser_offon_period_ == 0) {
+ // Flip denoiser_on_ periodically
+ denoiser_on_ ^= 1;
+ }
+ encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_);
+ }
+
const vpx_rational_t tb = video->timebase();
timebase_ = static_cast<double>(tb.num) / tb.den;
duration_ = 0;
double effective_datarate_;
size_t bits_in_last_frame_;
int denoiser_on_;
+ int denoiser_offon_test_;
+ int denoiser_offon_period_;
};
#if CONFIG_TEMPORAL_DENOISING
<< " The datarate for the file missed the target!";
}
}
+
+// Check basic datarate targeting, for a single bitrate, when denoiser is off
+// and on.
+TEST_P(DatarateTestLarge, DenoiserOffOn) {
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_dropframe_thresh = 1;
+ cfg_.rc_max_quantizer = 56;
+ cfg_.rc_end_usage = VPX_CBR;
+ ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ 30, 1, 0, 299);
+ cfg_.rc_target_bitrate = 300;
+ ResetModel();
+ // The denoiser is off by default.
+ denoiser_on_ = 0;
+ // Set the offon test flag.
+ denoiser_offon_test_ = 1;
+ denoiser_offon_period_ = 100;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
+ << " The datarate for the file exceeds the target!";
+ ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.3)
+ << " The datarate for the file missed the target!";
+}
#endif // CONFIG_TEMPORAL_DENOISING
TEST_P(DatarateTestLarge, BasicBufferModel) {
// Check basic datarate targeting, for a single bitrate, when denoiser is off
// and on.
-TEST_P(DatarateTestVP9Large, DenoiserOffon) {
+TEST_P(DatarateTestVP9Large, DenoiserOffOn) {
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
cfg_.rc_buf_sz = 1000;
static bool compare_img(const vpx_image_t *img1,
const vpx_image_t *img2) {
bool match = (img1->fmt == img2->fmt) &&
+ (img1->cs == img2->cs) &&
(img1->d_w == img2->d_w) &&
(img1->d_h == img2->d_h);
void Reset() {
error_nframes_ = 0;
droppable_nframes_ = 0;
+ pattern_switch_ = 0;
}
virtual void SetUp() {
// 1 3
// 0 2 .....
// LAST is updated on base/layer 0, GOLDEN updated on layer 1.
- int SetFrameFlags(int frame_num, int num_temp_layers) {
+ // Non-zero pattern_switch parameter means pattern will switch to
+ // not using LAST for frame_num >= pattern_switch.
+ int SetFrameFlags(int frame_num,
+ int num_temp_layers,
+ int pattern_switch) {
int frame_flags = 0;
if (num_temp_layers == 2) {
- if (frame_num % 2 == 0) {
- // Layer 0: predict from L and ARF, update L.
- frame_flags = VP8_EFLAG_NO_REF_GF |
- VP8_EFLAG_NO_UPD_GF |
- VP8_EFLAG_NO_UPD_ARF;
- } else {
- // Layer 1: predict from L, GF, and ARF, and update GF.
- frame_flags = VP8_EFLAG_NO_UPD_ARF |
- VP8_EFLAG_NO_UPD_LAST;
- }
+ if (frame_num % 2 == 0) {
+ if (frame_num < pattern_switch || pattern_switch == 0) {
+ // Layer 0: predict from LAST and ARF, update LAST.
+ frame_flags = VP8_EFLAG_NO_REF_GF |
+ VP8_EFLAG_NO_UPD_GF |
+ VP8_EFLAG_NO_UPD_ARF;
+ } else {
+ // Layer 0: predict from GF and ARF, update GF.
+ frame_flags = VP8_EFLAG_NO_REF_LAST |
+ VP8_EFLAG_NO_UPD_LAST |
+ VP8_EFLAG_NO_UPD_ARF;
+ }
+ } else {
+ if (frame_num < pattern_switch || pattern_switch == 0) {
+ // Layer 1: predict from L, GF, and ARF, update GF.
+ frame_flags = VP8_EFLAG_NO_UPD_ARF |
+ VP8_EFLAG_NO_UPD_LAST;
+ } else {
+ // Layer 1: predict from GF and ARF, update GF.
+ frame_flags = VP8_EFLAG_NO_REF_LAST |
+ VP8_EFLAG_NO_UPD_LAST |
+ VP8_EFLAG_NO_UPD_ARF;
+ }
+ }
}
return frame_flags;
}
VP8_EFLAG_NO_UPD_ARF);
// For temporal layer case.
if (cfg_.ts_number_layers > 1) {
- frame_flags_ = SetFrameFlags(video->frame(), cfg_.ts_number_layers);
+ frame_flags_ = SetFrameFlags(video->frame(),
+ cfg_.ts_number_layers,
+ pattern_switch_);
for (unsigned int i = 0; i < droppable_nframes_; ++i) {
if (droppable_frames_[i] == video->frame()) {
std::cout << "Encoding droppable frame: "
return mismatch_nframes_;
}
+ void SetPatternSwitch(int frame_switch) {
+ pattern_switch_ = frame_switch;
+ }
+
private:
double psnr_;
unsigned int nframes_;
unsigned int error_nframes_;
unsigned int droppable_nframes_;
+ unsigned int pattern_switch_;
double mismatch_psnr_;
unsigned int mismatch_nframes_;
unsigned int error_frames_[kMaxErrorFrames];
// Error resilient mode ON.
cfg_.g_error_resilient = 1;
cfg_.kf_mode = VPX_KF_DISABLED;
+ SetPatternSwitch(0);
// The odd frames are the enhancement layer for 2 layer pattern, so set
// those frames as droppable. Drop the last 7 frames.
Reset();
}
+// Check for successful decoding and no encoder/decoder mismatch
+// for a two layer temporal pattern, where at some point in the
+// sequence, the LAST ref is not used anymore.
+TEST_P(ErrorResilienceTestLarge, 2LayersNoRefLast) {
+ const vpx_rational timebase = { 33333333, 1000000000 };
+ cfg_.g_timebase = timebase;
+ cfg_.rc_target_bitrate = 500;
+ cfg_.g_lag_in_frames = 0;
+
+ cfg_.rc_end_usage = VPX_CBR;
+ // 2 Temporal layers, no spatial layers, CBR mode.
+ cfg_.ss_number_layers = 1;
+ cfg_.ts_number_layers = 2;
+ cfg_.ts_rate_decimator[0] = 2;
+ cfg_.ts_rate_decimator[1] = 1;
+ cfg_.ts_periodicity = 2;
+ cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
+ cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
+
+ init_flags_ = VPX_CODEC_USE_PSNR;
+
+ libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+ timebase.den, timebase.num, 0, 100);
+
+ // Error resilient mode ON.
+ cfg_.g_error_resilient = 1;
+ cfg_.kf_mode = VPX_KF_DISABLED;
+ SetPatternSwitch(60);
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ // Test that no mismatches have been found
+ std::cout << " Mismatch frames: "
+ << GetMismatchFrames() << "\n";
+ EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
+
+ // Reset previously set of error/droppable frames.
+ Reset();
+}
+
class ErrorResilienceTestLargeCodecControls : public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
protected:
TEST_P(ResizeTest, TestExternalResizeWorks) {
ResizingVideoSource video;
+ cfg_.g_lag_in_frames = 0;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
const unsigned int expected_h = ScaleForFrameNumber(frame, kInitialHeight);
EXPECT_EQ(expected_w, info->w)
- << "Frame " << frame << "had unexpected width";
+ << "Frame " << frame << " had unexpected width";
EXPECT_EQ(expected_h, info->h)
- << "Frame " << frame << "had unexpected height";
+ << "Frame " << frame << " had unexpected height";
}
}
}
VP8_INSTANTIATE_TEST_CASE(ResizeTest, ONE_PASS_TEST_MODES);
+VP9_INSTANTIATE_TEST_CASE(ResizeTest,
+ ::testing::Values(::libvpx_test::kRealTime));
VP9_INSTANTIATE_TEST_CASE(ResizeInternalTest,
::testing::Values(::libvpx_test::kOnePassBest));
} // namespace
const vp9_variance_fn_t variance8x8_neon = vp9_variance8x8_neon;
const vp9_variance_fn_t variance16x16_neon = vp9_variance16x16_neon;
const vp9_variance_fn_t variance32x32_neon = vp9_variance32x32_neon;
+const vp9_variance_fn_t variance32x64_neon = vp9_variance32x64_neon;
+const vp9_variance_fn_t variance64x32_neon = vp9_variance64x32_neon;
+const vp9_variance_fn_t variance64x64_neon = vp9_variance64x64_neon;
INSTANTIATE_TEST_CASE_P(
NEON, VP9VarianceTest,
::testing::Values(make_tuple(3, 3, variance8x8_neon, 0),
make_tuple(4, 4, variance16x16_neon, 0),
- make_tuple(5, 5, variance32x32_neon, 0)));
+ make_tuple(5, 5, variance32x32_neon, 0),
+ make_tuple(5, 6, variance32x64_neon, 0),
+ make_tuple(6, 5, variance64x32_neon, 0),
+ make_tuple(6, 6, variance64x64_neon, 0)));
const vp9_subpixvariance_fn_t subpel_variance8x8_neon =
vp9_sub_pixel_variance8x8_neon;
vp9_sub_pixel_variance16x16_neon;
const vp9_subpixvariance_fn_t subpel_variance32x32_neon =
vp9_sub_pixel_variance32x32_neon;
+const vp9_subpixvariance_fn_t subpel_variance64x64_neon =
+ vp9_sub_pixel_variance64x64_neon;
INSTANTIATE_TEST_CASE_P(
NEON, VP9SubpelVarianceTest,
::testing::Values(make_tuple(3, 3, subpel_variance8x8_neon, 0),
make_tuple(4, 4, subpel_variance16x16_neon, 0),
- make_tuple(5, 5, subpel_variance32x32_neon, 0)));
+ make_tuple(5, 5, subpel_variance32x32_neon, 0),
+ make_tuple(6, 6, subpel_variance64x64_neon, 0)));
#endif // HAVE_NEON
#endif // CONFIG_VP9_ENCODER
#endif
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+ NEON, AverageTest,
+ ::testing::Values(
+ make_tuple(16, 16, 0, 8, &vp9_avg_8x8_neon),
+ make_tuple(16, 16, 5, 8, &vp9_avg_8x8_neon),
+ make_tuple(32, 32, 15, 8, &vp9_avg_8x8_neon)));
+
+#endif
+
} // namespace
int32_t lossless;
int32_t error_resilient;
int32_t frame_parallel;
+ vpx_color_space_t cs;
// TODO(JBB): quantizers / bitrate
};
const EncodeParameters kVP9EncodeParameterSet[] = {
- {0, 0, 0, 1, 0},
- {0, 0, 0, 0, 0},
- {0, 0, 1, 0, 0},
- {0, 2, 0, 0, 1},
+ {0, 0, 0, 1, 0, VPX_CS_BT_601},
+ {0, 0, 0, 0, 0, VPX_CS_BT_709},
+ {0, 0, 1, 0, 0, VPX_CS_BT_2020},
+ {0, 2, 0, 0, 1, VPX_CS_UNKNOWN},
// TODO(JBB): Test profiles (requires more work).
};
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
if (video->frame() == 1) {
+ encoder->Control(VP9E_SET_COLOR_SPACE, encode_parms.cs);
encoder->Control(VP9E_SET_LOSSLESS, encode_parms.lossless);
encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING,
encode_parms.frame_parallel);
EXPECT_EQ(common->frame_parallel_decoding_mode,
encode_parms.frame_parallel);
}
-
+ EXPECT_EQ(common->color_space, encode_parms.cs);
EXPECT_EQ(common->log2_tile_cols, encode_parms.tile_cols);
EXPECT_EQ(common->log2_tile_rows, encode_parms.tile_rows);
The available initialization methods are:
- \if encoder - #vpx_codec_enc_init (calls vpx_codec_enc_init_ver()) \endif
- \if multi-encoder - #vpx_codec_enc_init_multi (calls vpx_codec_enc_init_multi_ver()) \endif
+ \if encoder
+ - #vpx_codec_enc_init (calls vpx_codec_enc_init_ver())
+ - #vpx_codec_enc_init_multi (calls vpx_codec_enc_init_multi_ver())
+ .
+ \endif
\if decoder - #vpx_codec_dec_init (calls vpx_codec_dec_init_ver()) \endif
int ref_frame_map[4];
int sign_bias = 0;
int dot_artifact_candidate = 0;
- // For detecting dot artifact.
- unsigned char* target = x->src.y_buffer;
- unsigned char* target_u = x->block[16].src + *x->block[16].base_src;
- unsigned char* target_v = x->block[20].src + *x->block[20].base_src;
- int stride = x->src.y_stride;
- int stride_uv = x->block[16].src_stride;
+ get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
+
+ // If the current frame is using LAST as a reference, check for
+ // biasing the mode selection for dot artifacts.
+ if (cpi->ref_frame_flags & VP8_LAST_FRAME) {
+ unsigned char* target_y = x->src.y_buffer;
+ unsigned char* target_u = x->block[16].src + *x->block[16].base_src;
+ unsigned char* target_v = x->block[20].src + *x->block[20].base_src;
+ int stride = x->src.y_stride;
+ int stride_uv = x->block[16].src_stride;
#if CONFIG_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity) {
- int uv_denoise = (cpi->oxcf.noise_sensitivity >= 2) ? 1 : 0;
- target =
- cpi->denoiser.yv12_running_avg[LAST_FRAME].y_buffer + recon_yoffset;
- stride = cpi->denoiser.yv12_running_avg[LAST_FRAME].y_stride;
- if (uv_denoise) {
- target_u =
- cpi->denoiser.yv12_running_avg[LAST_FRAME].u_buffer + recon_uvoffset;
- target_v =
- cpi->denoiser.yv12_running_avg[LAST_FRAME].v_buffer + recon_uvoffset;
- stride_uv = cpi->denoiser.yv12_running_avg[LAST_FRAME].uv_stride;
+ if (cpi->oxcf.noise_sensitivity) {
+ const int uv_denoise = (cpi->oxcf.noise_sensitivity >= 2) ? 1 : 0;
+ target_y =
+ cpi->denoiser.yv12_running_avg[LAST_FRAME].y_buffer + recon_yoffset;
+ stride = cpi->denoiser.yv12_running_avg[LAST_FRAME].y_stride;
+ if (uv_denoise) {
+ target_u =
+ cpi->denoiser.yv12_running_avg[LAST_FRAME].u_buffer +
+ recon_uvoffset;
+ target_v =
+ cpi->denoiser.yv12_running_avg[LAST_FRAME].v_buffer +
+ recon_uvoffset;
+ stride_uv = cpi->denoiser.yv12_running_avg[LAST_FRAME].uv_stride;
+ }
}
- }
#endif
-
- get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset);
-
- dot_artifact_candidate =
- check_dot_artifact_candidate(cpi, x,
- target, stride,
- plane[LAST_FRAME][0], mb_row, mb_col, 0);
- // If not found in Y channel, check UV channel.
- if (!dot_artifact_candidate) {
dot_artifact_candidate =
- check_dot_artifact_candidate(cpi, x,
- target_u, stride_uv,
- plane[LAST_FRAME][1], mb_row, mb_col, 1);
+ check_dot_artifact_candidate(cpi, x, target_y, stride,
+ plane[LAST_FRAME][0], mb_row, mb_col, 0);
+ // If not found in Y channel, check UV channel.
if (!dot_artifact_candidate) {
dot_artifact_candidate =
- check_dot_artifact_candidate(cpi, x,
- target_v, stride_uv,
- plane[LAST_FRAME][2], mb_row, mb_col, 2);
+ check_dot_artifact_candidate(cpi, x, target_u, stride_uv,
+ plane[LAST_FRAME][1], mb_row, mb_col, 1);
+ if (!dot_artifact_candidate) {
+ dot_artifact_candidate =
+ check_dot_artifact_candidate(cpi, x, target_v, stride_uv,
+ plane[LAST_FRAME][2], mb_row, mb_col, 2);
+ }
}
}
} TX_TYPE;
typedef enum {
- UNKNOWN = 0,
- BT_601 = 1, // YUV
- BT_709 = 2, // YUV
- SMPTE_170 = 3, // YUV
- SMPTE_240 = 4, // YUV
- BT_2020 = 5, // YUV
- RESERVED_2 = 6,
- SRGB = 7 // RGB
-} COLOR_SPACE;
-
-typedef enum {
VP9_LAST_FLAG = 1 << 0,
VP9_GOLD_FLAG = 1 << 1,
VP9_ALT_FLAG = 1 << 2,
}
#endif // CONFIG_VP9_HIGHBITDEPTH
-static void filter_block_plane_non420(VP9_COMMON *cm,
- struct macroblockd_plane *plane,
- MODE_INFO *mi_8x8,
- int mi_row, int mi_col) {
+void vp9_filter_block_plane_non420(VP9_COMMON *cm,
+ struct macroblockd_plane *plane,
+ MODE_INFO *mi_8x8,
+ int mi_row, int mi_col) {
const int ss_x = plane->subsampling_x;
const int ss_y = plane->subsampling_y;
const int row_step = 1 << ss_y;
if (use_420)
vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm);
else
- filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
- mi_row, mi_col);
+ vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
+ mi_row, mi_col);
}
}
}
int mi_row,
LOOP_FILTER_MASK *lfm);
+void vp9_filter_block_plane_non420(struct VP9Common *cm,
+ struct macroblockd_plane *plane,
+ MODE_INFO *mi_8x8,
+ int mi_row, int mi_col);
+
void vp9_loop_filter_init(struct VP9Common *cm);
// Update the loop filter for the current frame.
*/
#include "./vpx_config.h"
-
#include "vpx_mem/vpx_mem.h"
-
+#include "vp9/common/vp9_loopfilter_thread.h"
#include "vp9/common/vp9_reconinter.h"
-#include "vp9/decoder/vp9_dthread.h"
-#include "vp9/decoder/vp9_decoder.h"
-
#if CONFIG_MULTITHREAD
static INLINE void mutex_lock(pthread_mutex_t *const mutex) {
const int kMaxTryLocks = 4000;
}
// Implement row loopfiltering for each thread.
-static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer,
- VP9_COMMON *const cm,
- struct macroblockd_plane planes[MAX_MB_PLANE],
- int start, int stop, int y_only,
- VP9LfSync *const lf_sync) {
+static INLINE
+void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer,
+ VP9_COMMON *const cm,
+ struct macroblockd_plane planes[MAX_MB_PLANE],
+ int start, int stop, int y_only,
+ VP9LfSync *const lf_sync) {
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
- int r, c; // SB row and col
+ const int use_420 = y_only || (planes[1].subsampling_y == 1 &&
+ planes[1].subsampling_x == 1);
const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
+ int mi_row, mi_col;
- for (r = start; r < stop; r += lf_sync->num_workers) {
- const int mi_row = r << MI_BLOCK_SIZE_LOG2;
+ for (mi_row = start; mi_row < stop;
+ mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride;
- for (c = 0; c < sb_cols; ++c) {
- const int mi_col = c << MI_BLOCK_SIZE_LOG2;
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
+ const int r = mi_row >> MI_BLOCK_SIZE_LOG2;
+ const int c = mi_col >> MI_BLOCK_SIZE_LOG2;
LOOP_FILTER_MASK lfm;
int plane;
sync_read(lf_sync, r, c);
vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
- vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
+
+ // TODO(JBB): Make setup_mask work for non 420.
+ if (use_420)
+ vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
+ &lfm);
for (plane = 0; plane < num_planes; ++plane) {
- vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm);
+ if (use_420)
+ vp9_filter_block_plane(cm, &planes[plane], mi_row, &lfm);
+ else
+ vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
+ mi_row, mi_col);
}
sync_write(lf_sync, r, c, sb_cols);
// Row-based multi-threaded loopfilter hook
static int loop_filter_row_worker(VP9LfSync *const lf_sync,
LFWorkerData *const lf_data) {
- loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
- lf_data->start, lf_data->stop, lf_data->y_only, lf_sync);
+ thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+ lf_data->start, lf_data->stop, lf_data->y_only,
+ lf_sync);
return 1;
}
-// VP9 decoder: Implement multi-threaded loopfilter that uses the tile
-// threads.
-void vp9_loop_filter_frame_mt(VP9LfSync *lf_sync,
- YV12_BUFFER_CONFIG *frame,
- struct macroblockd_plane planes[MAX_MB_PLANE],
- VP9_COMMON *cm,
- VP9Worker *workers, int nworkers,
- int frame_filter_level,
- int y_only) {
+static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame,
+ VP9_COMMON *cm,
+ struct macroblockd_plane planes[MAX_MB_PLANE],
+ int start, int stop, int y_only,
+ VP9Worker *workers, int nworkers,
+ VP9LfSync *lf_sync) {
const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
// Number of superblock rows and cols
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+ // Decoder may allocate more threads than number of tiles based on user's
+ // input.
const int tile_cols = 1 << cm->log2_tile_cols;
const int num_workers = MIN(nworkers, tile_cols);
int i;
- if (!frame_filter_level) return;
-
if (!lf_sync->sync_range || cm->last_height != cm->height ||
num_workers > lf_sync->num_workers) {
vp9_loop_filter_dealloc(lf_sync);
vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
}
- vp9_loop_filter_frame_init(cm, frame_filter_level);
-
// Initialize cur_sb_col to -1 for all SB rows.
vpx_memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
// Loopfilter data
vp9_loop_filter_data_reset(lf_data, frame, cm, planes);
- lf_data->start = i;
- lf_data->stop = sb_rows;
+ lf_data->start = start + i * MI_BLOCK_SIZE;
+ lf_data->stop = stop;
lf_data->y_only = y_only;
// Start loopfiltering
}
}
+void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
+ VP9_COMMON *cm,
+ struct macroblockd_plane planes[MAX_MB_PLANE],
+ int frame_filter_level,
+ int y_only, int partial_frame,
+ VP9Worker *workers, int num_workers,
+ VP9LfSync *lf_sync) {
+ int start_mi_row, end_mi_row, mi_rows_to_filter;
+
+ if (!frame_filter_level) return;
+
+ start_mi_row = 0;
+ mi_rows_to_filter = cm->mi_rows;
+ if (partial_frame && cm->mi_rows > 8) {
+ start_mi_row = cm->mi_rows >> 1;
+ start_mi_row &= 0xfffffff8;
+ mi_rows_to_filter = MAX(cm->mi_rows / 8, 8);
+ }
+ end_mi_row = start_mi_row + mi_rows_to_filter;
+ vp9_loop_filter_frame_init(cm, frame_filter_level);
+
+ loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row,
+ y_only, workers, num_workers, lf_sync);
+}
+
// Set up nsync by width.
-static int get_sync_range(int width) {
+static INLINE int get_sync_range(int width) {
// nsync numbers are picked by testing. For example, for 4k
// video, using 4 gives best performance.
if (width < 640)
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_DECODER_VP9_DTHREAD_H_
-#define VP9_DECODER_VP9_DTHREAD_H_
-
+#ifndef VP9_COMMON_VP9_LOOPFILTER_THREAD_H_
+#define VP9_COMMON_VP9_LOOPFILTER_THREAD_H_
#include "./vpx_config.h"
+#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_thread.h"
-#include "vp9/decoder/vp9_reader.h"
-#include "vpx/internal/vpx_codec_internal.h"
struct VP9Common;
-struct VP9Decoder;
-
-typedef struct TileWorkerData {
- struct VP9Common *cm;
- vp9_reader bit_reader;
- DECLARE_ALIGNED(16, struct macroblockd, xd);
- struct vpx_internal_error_info error_info;
-} TileWorkerData;
// Loopfilter row synchronization
typedef struct VP9LfSyncData {
} VP9LfSync;
// Allocate memory for loopfilter row synchronization.
-void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
+void vp9_loop_filter_alloc(VP9LfSync *lf_sync, struct VP9Common *cm, int rows,
int width, int num_workers);
// Deallocate loopfilter synchronization related mutex and data.
void vp9_loop_filter_dealloc(VP9LfSync *lf_sync);
// Multi-threaded loopfilter that uses the tile threads.
-void vp9_loop_filter_frame_mt(VP9LfSync *lf_sync,
- YV12_BUFFER_CONFIG *frame,
- struct macroblockd_plane planes[MAX_MB_PLANE],
+void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
struct VP9Common *cm,
- VP9Worker *workers, int num_workers,
+ struct macroblockd_plane planes[MAX_MB_PLANE],
int frame_filter_level,
- int y_only);
+ int y_only, int partial_frame,
+ VP9Worker *workers, int num_workers,
+ VP9LfSync *lf_sync);
-#endif // VP9_DECODER_VP9_DTHREAD_H_
+#endif // VP9_COMMON_VP9_LOOPFILTER_THREAD_H_
DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]);
- COLOR_SPACE color_space;
+ vpx_color_space_t color_space;
int width;
int height;
specialize qw/vp9_variance16x32/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x32 avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance64x32 avx2 neon/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance32x64/, "$sse2_x86inc";
+specialize qw/vp9_variance32x64 neon/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance32x32 avx2 neon/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x64 avx2/, "$sse2_x86inc";
+specialize qw/vp9_variance64x64 avx2 neon/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance16x16 avx2 neon/, "$sse2_x86inc";
specialize qw/vp9_variance4x4/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_sub_pixel_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
+specialize qw/vp9_sub_pixel_variance64x64 avx2 neon/, "$sse2_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance64x64 avx2/, "$sse2_x86inc", "$ssse3_x86inc";
specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
-specialize qw/vp9_avg_8x8 sse2/;
+specialize qw/vp9_avg_8x8 sse2 neon/;
add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
specialize qw/vp9_avg_4x4 sse2/;
#include "vp9/decoder/vp9_decodemv.h"
#include "vp9/decoder/vp9_decoder.h"
#include "vp9/decoder/vp9_dsubexp.h"
-#include "vp9/decoder/vp9_dthread.h"
#include "vp9/decoder/vp9_read_bit_buffer.h"
#include "vp9/decoder/vp9_reader.h"
}
cm->frame_bufs[cm->new_fb_idx].buf.subsampling_x = cm->subsampling_x;
cm->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y;
+ cm->frame_bufs[cm->new_fb_idx].buf.color_space =
+ (vpx_color_space_t)cm->color_space;
cm->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth;
}
cm->use_highbitdepth = 0;
#endif
}
- cm->color_space = (COLOR_SPACE)vp9_rb_read_literal(rb, 3);
- if (cm->color_space != SRGB) {
+ cm->color_space = vp9_rb_read_literal(rb, 3);
+ if (cm->color_space != VPX_CS_SRGB) {
vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range
if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
cm->subsampling_x = vp9_rb_read_bit(rb);
// of either the color format or color sub-sampling in profile 0. VP9
// specifies that the default color format should be YUV 4:2:0 in this
// case (normative).
- cm->color_space = BT_601;
+ cm->color_space = VPX_CS_BT_601;
cm->subsampling_y = cm->subsampling_x = 1;
cm->bit_depth = VPX_BITS_8;
#if CONFIG_VP9_HIGHBITDEPTH
if (!xd->corrupted) {
// If multiple threads are used to decode tiles, then we use those threads
// to do parallel loopfiltering.
- vp9_loop_filter_frame_mt(&pbi->lf_row_sync, new_fb, pbi->mb.plane, cm,
- pbi->tile_workers, pbi->num_tile_workers,
- cm->lf.filter_level, 0);
+ vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level,
+ 0, 0, pbi->tile_workers, pbi->num_tile_workers,
+ &pbi->lf_row_sync);
} else {
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Decode failed. Frame data is corrupted.");
#include "vp9/decoder/vp9_decodeframe.h"
#include "vp9/decoder/vp9_decoder.h"
#include "vp9/decoder/vp9_detokenize.h"
-#include "vp9/decoder/vp9_dthread.h"
static void initialize_dec(void) {
static volatile int init_done = 0;
#include "vpx/vpx_codec.h"
#include "vpx_scale/yv12config.h"
-
+#include "vp9/common/vp9_loopfilter_thread.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_ppflags.h"
#include "vp9/common/vp9_thread.h"
-
-#include "vp9/decoder/vp9_dthread.h"
+#include "vp9/decoder/vp9_reader.h"
#ifdef __cplusplus
extern "C" {
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
} TileData;
+typedef struct TileWorkerData {
+ VP9_COMMON *cm;
+ vp9_reader bit_reader;
+ DECLARE_ALIGNED(16, MACROBLOCKD, xd);
+ struct vpx_internal_error_info error_info;
+} TileWorkerData;
+
typedef struct VP9Decoder {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
--- /dev/null
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+
+#include "vpx/vpx_integer.h"
+
+static INLINE unsigned int horizontal_add_u16x8(const uint16x8_t v_16x8) {
+ const uint32x4_t a = vpaddlq_u16(v_16x8);
+ const uint64x2_t b = vpaddlq_u32(a);
+ const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
+ vreinterpret_u32_u64(vget_high_u64(b)));
+ return vget_lane_u32(c, 0);
+}
+
+unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) {
+ uint8x8_t v_s0 = vld1_u8(s);
+ const uint8x8_t v_s1 = vld1_u8(s + p);
+ uint16x8_t v_sum = vaddl_u8(v_s0, v_s1);
+
+ v_s0 = vld1_u8(s + 2 * p);
+ v_sum = vaddw_u8(v_sum, v_s0);
+
+ v_s0 = vld1_u8(s + 3 * p);
+ v_sum = vaddw_u8(v_sum, v_s0);
+
+ v_s0 = vld1_u8(s + 4 * p);
+ v_sum = vaddw_u8(v_sum, v_s0);
+
+ v_s0 = vld1_u8(s + 5 * p);
+ v_sum = vaddw_u8(v_sum, v_s0);
+
+ v_s0 = vld1_u8(s + 6 * p);
+ v_sum = vaddw_u8(v_sum, v_s0);
+
+ v_s0 = vld1_u8(s + 7 * p);
+ v_sum = vaddw_u8(v_sum, v_s0);
+
+ return (horizontal_add_u16x8(v_sum) + 32) >> 6;
+}
#include <arm_neon.h>
#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
enum { kWidth32 = 32 };
enum { kHeight32 = 32 };
enum { kHeight32PlusOne = 33 };
+enum { kWidth64 = 64 };
+enum { kHeight64 = 64 };
+enum { kHeight64PlusOne = 65 };
enum { kPixelStepOne = 1 };
enum { kAlign16 = 16 };
return vget_lane_s32(c, 0);
}
+// w * h must be less than 2048 or local variable v_sum may overflow.
static void variance_neon_w8(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
- int w, int h, unsigned int *sse, int *sum) {
+ int w, int h, uint32_t *sse, int *sum) {
int i, j;
int16x8_t v_sum = vdupq_n_s16(0);
int32x4_t v_sse_lo = vdupq_n_s32(0);
unsigned int *sse) {
int sum;
variance_neon_w8(a, a_stride, b, b_stride, kWidth8, kHeight8, sse, &sum);
- return *sse - (((int64_t)sum * sum) / (kWidth8 * kHeight8));
+ return *sse - (((int64_t)sum * sum) >> 6); // >> 6 = / 8 * 8
}
void vp9_get16x16var_neon(const uint8_t *src_ptr, int source_stride,
unsigned int *sse) {
int sum;
variance_neon_w8(a, a_stride, b, b_stride, kWidth16, kHeight16, sse, &sum);
- return *sse - (((int64_t)sum * sum) / (kWidth16 * kHeight16));
+ return *sse - (((int64_t)sum * sum) >> 8); // >> 8 = / 16 * 16
}
static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
unsigned int *sse) {
int sum;
variance_neon_w8(a, a_stride, b, b_stride, kWidth32, kHeight32, sse, &sum);
- return *sse - (((int64_t)sum * sum) / (kWidth32 * kHeight32));
+ return *sse - (((int64_t)sum * sum) >> 10); // >> 10 = / 32 * 32
+}
+
+unsigned int vp9_variance32x64_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse) {
+ int sum1, sum2;
+ uint32_t sse1, sse2;
+ variance_neon_w8(a, a_stride, b, b_stride, kWidth32, kHeight32, &sse1, &sum1);
+ variance_neon_w8(a + (kHeight32 * a_stride), a_stride,
+ b + (kHeight32 * b_stride), b_stride, kWidth32, kHeight32,
+ &sse2, &sum2);
+ *sse = sse1 + sse2;
+ sum1 += sum2;
+ return *sse - (((int64_t)sum1 * sum1) >> 11); // >> 11 = / 32 * 64
+}
+
+unsigned int vp9_variance64x32_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse) {
+ int sum1, sum2;
+ uint32_t sse1, sse2;
+ variance_neon_w8(a, a_stride, b, b_stride, kWidth64, kHeight16, &sse1, &sum1);
+ variance_neon_w8(a + (kHeight16 * a_stride), a_stride,
+ b + (kHeight16 * b_stride), b_stride, kWidth64, kHeight16,
+ &sse2, &sum2);
+ *sse = sse1 + sse2;
+ sum1 += sum2;
+ return *sse - (((int64_t)sum1 * sum1) >> 11); // >> 11 = / 32 * 64
+}
+
+unsigned int vp9_variance64x64_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse) {
+ int sum1, sum2;
+ uint32_t sse1, sse2;
+
+ variance_neon_w8(a, a_stride, b, b_stride, kWidth64, kHeight16, &sse1, &sum1);
+ variance_neon_w8(a + (kHeight16 * a_stride), a_stride,
+ b + (kHeight16 * b_stride), b_stride, kWidth64, kHeight16,
+ &sse2, &sum2);
+ sse1 += sse2;
+ sum1 += sum2;
+
+ variance_neon_w8(a + (kHeight16 * 2 * a_stride), a_stride,
+ b + (kHeight16 * 2 * b_stride), b_stride,
+ kWidth64, kHeight16, &sse2, &sum2);
+ sse1 += sse2;
+ sum1 += sum2;
+
+ variance_neon_w8(a + (kHeight16 * 3 * a_stride), a_stride,
+ b + (kHeight16 * 3 * b_stride), b_stride,
+ kWidth64, kHeight16, &sse2, &sum2);
+ *sse = sse1 + sse2;
+ sum1 += sum2;
+ return *sse - (((int64_t)sum1 * sum1) >> 12); // >> 12 = / 64 * 64
}
unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
kWidth32, BILINEAR_FILTERS_2TAP(yoffset));
return vp9_variance32x32_neon(temp2, kWidth32, dst, dst_stride, sse);
}
+
+unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src,
+ int src_stride,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst,
+ int dst_stride,
+ unsigned int *sse) {
+ DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, temp2, kHeight64 * kWidth64);
+ DECLARE_ALIGNED_ARRAY(kAlign16, uint8_t, fdata3, kHeight64PlusOne * kWidth64);
+
+ var_filter_block2d_bil_w16(src, fdata3, src_stride, kPixelStepOne,
+ kHeight64PlusOne, kWidth64,
+ BILINEAR_FILTERS_2TAP(xoffset));
+ var_filter_block2d_bil_w16(fdata3, temp2, kWidth64, kWidth64, kHeight64,
+ kWidth64, BILINEAR_FILTERS_2TAP(yoffset));
+ return vp9_variance64x64_neon(temp2, kWidth64, dst, dst_stride, sse);
+}
vp9_wb_write_bit(wb, cm->bit_depth == VPX_BITS_10 ? 0 : 1);
}
vp9_wb_write_literal(wb, cm->color_space, 3);
- if (cm->color_space != SRGB) {
+ if (cm->color_space != VPX_CS_SRGB) {
vp9_wb_write_bit(wb, 0); // 0: [16, 235] (i.e. xvYCC), 1: [0, 255]
if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) {
assert(cm->subsampling_x != 1 || cm->subsampling_y != 1);
int r;
const uint8_t *srcbuf = src.y_buffer;
uint8_t *destbuf = dest.y_buffer;
+
assert(dest.y_width == src.y_width);
assert(dest.y_height == src.y_height);
}
}
-static void swap_frame_buffer(YV12_BUFFER_CONFIG dest,
- YV12_BUFFER_CONFIG src) {
- uint8_t *tmp_buf = dest.y_buffer;
- assert(dest.y_width == src.y_width);
- assert(dest.y_height == src.y_height);
- dest.y_buffer = src.y_buffer;
- src.y_buffer = tmp_buf;
+static void swap_frame_buffer(YV12_BUFFER_CONFIG *dest,
+ YV12_BUFFER_CONFIG *src) {
+ uint8_t *tmp_buf = dest->y_buffer;
+ assert(dest->y_width == src->y_width);
+ assert(dest->y_height == src->y_height);
+ dest->y_buffer = src->y_buffer;
+ src->y_buffer = tmp_buf;
}
void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
/* For non key frames */
if (refresh_alt_ref_frame) {
- swap_frame_buffer(denoiser->running_avg_y[ALTREF_FRAME],
- denoiser->running_avg_y[INTRA_FRAME]);
+ swap_frame_buffer(&denoiser->running_avg_y[ALTREF_FRAME],
+ &denoiser->running_avg_y[INTRA_FRAME]);
}
if (refresh_golden_frame) {
- swap_frame_buffer(denoiser->running_avg_y[GOLDEN_FRAME],
- denoiser->running_avg_y[INTRA_FRAME]);
+ swap_frame_buffer(&denoiser->running_avg_y[GOLDEN_FRAME],
+ &denoiser->running_avg_y[INTRA_FRAME]);
}
if (refresh_last_frame) {
- swap_frame_buffer(denoiser->running_avg_y[LAST_FRAME],
- denoiser->running_avg_y[INTRA_FRAME]);
+ swap_frame_buffer(&denoiser->running_avg_y[LAST_FRAME],
+ &denoiser->running_avg_y[INTRA_FRAME]);
}
}
return;
}
for (i = 0; i < MAX_REF_FRAMES; ++i) {
- if (&denoiser->running_avg_y[i] != NULL) {
- vp9_free_frame_buffer(&denoiser->running_avg_y[i]);
- }
- }
- if (&denoiser->mc_running_avg_y != NULL) {
- vp9_free_frame_buffer(&denoiser->mc_running_avg_y);
+ vp9_free_frame_buffer(&denoiser->running_avg_y[i]);
}
+ vp9_free_frame_buffer(&denoiser->mc_running_avg_y);
}
#ifdef OUTPUT_YUV_DENOISED
void *data,
BLOCK_SIZE bsize,
int mi_row,
- int mi_col) {
+ int mi_col,
+ int threshold,
+ BLOCK_SIZE bsize_min) {
VP9_COMMON * const cm = &cpi->common;
variance_node vt;
const int block_width = num_8x8_blocks_wide_lookup[bsize];
const int block_height = num_8x8_blocks_high_lookup[bsize];
- // TODO(marpan): Adjust/tune these thresholds.
- const int threshold_multiplier = cm->frame_type == KEY_FRAME ? 80 : 4;
- int64_t threshold =
- (int64_t)(threshold_multiplier *
- vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth));
- int64_t threshold_bsize_ref = threshold << 6;
- int64_t threshold_low = threshold;
- BLOCK_SIZE bsize_ref = BLOCK_16X16;
assert(block_height == block_width);
tree_to_node(data, bsize, &vt);
- if (cm->frame_type == KEY_FRAME) {
- bsize_ref = BLOCK_8X8;
- // Choose lower thresholds for key frame variance to favor split, but keep
- // threshold for splitting to 4x4 block still fairly high for now.
- threshold_bsize_ref = threshold << 2;
- threshold_low = threshold >> 2;
- }
-
- // For bsize=bsize_ref (16x16/8x8 for 8x8/4x4 downsampling), select if
+ // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
// variance is below threshold, otherwise split will be selected.
// No check for vert/horiz split as too few samples for variance.
- if (bsize == bsize_ref) {
+ if (bsize == bsize_min) {
get_variance(&vt.part_variances->none);
if (mi_col + block_width / 2 < cm->mi_cols &&
mi_row + block_height / 2 < cm->mi_rows &&
- vt.part_variances->none.variance < threshold_bsize_ref) {
+ vt.part_variances->none.variance < threshold) {
set_block_size(cpi, xd, mi_row, mi_col, bsize);
return 1;
}
return 0;
- } else if (bsize > bsize_ref) {
+ } else if (bsize > bsize_min) {
get_variance(&vt.part_variances->none);
- // For key frame, for bsize above 32X32, or very high variance, take split.
+ // For key frame or low_res: for bsize above 32X32 or very high variance,
+ // take split.
if (cm->frame_type == KEY_FRAME &&
(bsize > BLOCK_32X32 ||
- vt.part_variances->none.variance > (threshold << 2))) {
+ vt.part_variances->none.variance > (threshold << 4))) {
return 0;
}
// If variance is low, take the bsize (no split).
if (mi_col + block_width / 2 < cm->mi_cols &&
mi_row + block_height / 2 < cm->mi_rows &&
- vt.part_variances->none.variance < threshold_low) {
+ vt.part_variances->none.variance < threshold) {
set_block_size(cpi, xd, mi_row, mi_col, bsize);
return 1;
}
if (mi_row + block_height / 2 < cm->mi_rows) {
get_variance(&vt.part_variances->vert[0]);
get_variance(&vt.part_variances->vert[1]);
- if (vt.part_variances->vert[0].variance < threshold_low &&
- vt.part_variances->vert[1].variance < threshold_low) {
+ if (vt.part_variances->vert[0].variance < threshold &&
+ vt.part_variances->vert[1].variance < threshold) {
BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
set_block_size(cpi, xd, mi_row, mi_col, subsize);
set_block_size(cpi, xd, mi_row, mi_col + block_width / 2, subsize);
if (mi_col + block_width / 2 < cm->mi_cols) {
get_variance(&vt.part_variances->horz[0]);
get_variance(&vt.part_variances->horz[1]);
- if (vt.part_variances->horz[0].variance < threshold_low &&
- vt.part_variances->horz[1].variance < threshold_low) {
+ if (vt.part_variances->horz[0].variance < threshold &&
+ vt.part_variances->horz[1].variance < threshold) {
BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
set_block_size(cpi, xd, mi_row, mi_col, subsize);
set_block_size(cpi, xd, mi_row + block_height / 2, mi_col, subsize);
}
// This function chooses partitioning based on the variance between source and
-// reconstructed last, where variance is computed for downsampled inputs.
-// Currently 8x8 downsampling is used for delta frames, 4x4 for key frames.
+// reconstructed last, where variance is computed for downs-sampled inputs.
static void choose_partitioning(VP9_COMP *cpi,
const TileInfo *const tile,
MACROBLOCK *x,
int i, j, k, m;
v64x64 vt;
+ v16x16 vt2[16];
uint8_t *s;
const uint8_t *d;
int sp;
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
const struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
+ // Always use 4x4 partition for key frame.
+ int use_4x4_partition = (cm->frame_type == KEY_FRAME);
+
+ int variance4x4downsample[16];
+ int low_res = (cm->width <= 352 && cm->height <= 288) ? 1 : 0;
+ const int threshold_multiplier = cm->frame_type == KEY_FRAME ? 80 : 4;
+ int64_t threshold_base = (int64_t)(threshold_multiplier *
+ vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth));
+ int64_t threshold = threshold_base;
+ int64_t threshold_bsize_min = threshold_base << 6;
+ int64_t threshold_bsize_max = threshold_base;
+ // Modify thresholds for key frame and for low-resolutions (set lower
+ // thresholds to favor split).
+ if (cm->frame_type == KEY_FRAME) {
+ threshold = threshold_base >> 2;
+ threshold_bsize_min = threshold_base << 2;
+ } else if (low_res) {
+ threshold_bsize_min = threshold_base << 3;
+ threshold_bsize_max = threshold_base >> 2;
+ }
+
vp9_clear_system_state();
set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
- // Fill in the entire tree of 8x8 variances for splits.
+ // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
+ // for splits.
for (i = 0; i < 4; i++) {
const int x32_idx = ((i & 1) << 5);
const int y32_idx = ((i >> 1) << 5);
+ const int i2 = i << 2;
for (j = 0; j < 4; j++) {
const int x16_idx = x32_idx + ((j & 1) << 4);
const int y16_idx = y32_idx + ((j >> 1) << 4);
v16x16 *vst = &vt.split[i].split[j];
- for (k = 0; k < 4; k++) {
- int x8_idx = x16_idx + ((k & 1) << 3);
- int y8_idx = y16_idx + ((k >> 1) << 3);
- if (cm->frame_type != KEY_FRAME) {
- unsigned int sse = 0;
- int sum = 0;
- if (x8_idx < pixels_wide && y8_idx < pixels_high) {
- int s_avg, d_avg;
+ variance4x4downsample[i2 + j] = 0;
+ if (cm->frame_type != KEY_FRAME) {
+ for (k = 0; k < 4; k++) {
+ int x8_idx = x16_idx + ((k & 1) << 3);
+ int y8_idx = y16_idx + ((k >> 1) << 3);
+ unsigned int sse = 0;
+ int sum = 0;
+ if (x8_idx < pixels_wide && y8_idx < pixels_high) {
+ int s_avg, d_avg;
#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- s_avg = vp9_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
- d_avg = vp9_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
- } else {
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ s_avg = vp9_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
+ d_avg = vp9_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
+ } else {
+ s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp);
+ d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp);
+ }
+#else
s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp);
d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp);
- }
-#else
- s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp);
- d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp);
#endif
- sum = s_avg - d_avg;
- sse = sum * sum;
- }
- // If variance is based on 8x8 downsampling, we stop here and have
- // one sample for 8x8 block (so use 1 for count in fill_variance),
- // which of course means variance = 0 for 8x8 block.
- fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
- } else {
- // For key frame, go down to 4x4.
- v8x8 *vst2 = &vst->split[k];
+ sum = s_avg - d_avg;
+ sse = sum * sum;
+ }
+ // If variance is based on 8x8 downsampling, we stop here and have
+ // one sample for 8x8 block (so use 1 for count in fill_variance),
+ // which of course means variance = 0 for 8x8 block.
+ fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
+ }
+ fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
+ // For low-resolution, compute the variance based on 8x8 down-sampling,
+ // and if it is large (above the threshold) we go down for 4x4.
+ // For key frame we always go down to 4x4.
+ if (low_res)
+ get_variance(&vt.split[i].split[j].part_variances.none);
+ }
+ if (cm->frame_type == KEY_FRAME || (low_res &&
+ vt.split[i].split[j].part_variances.none.variance >
+ (threshold << 1))) {
+ // Go down to 4x4 down-sampling for variance.
+ variance4x4downsample[i2 + j] = 1;
+ for (k = 0; k < 4; k++) {
+ int x8_idx = x16_idx + ((k & 1) << 3);
+ int y8_idx = y16_idx + ((k >> 1) << 3);
+ v8x8 *vst2 = (cm->frame_type == KEY_FRAME) ? &vst->split[k] :
+ &vt2[i2 + j].split[k];
for (m = 0; m < 4; m++) {
int x4_idx = x8_idx + ((m & 1) << 2);
int y4_idx = y8_idx + ((m >> 1) << 2);
unsigned int sse = 0;
int sum = 0;
if (x4_idx < pixels_wide && y4_idx < pixels_high) {
+ int d_avg = 128;
#if CONFIG_VP9_HIGHBITDEPTH
int s_avg;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
s_avg = vp9_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
+ if (cm->frame_type != KEY_FRAME)
+ d_avg = vp9_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
} else {
s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp);
+ if (cm->frame_type != KEY_FRAME)
+ d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp);
}
#else
int s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp);
+ if (cm->frame_type != KEY_FRAME)
+ d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp);
#endif
- // For key frame, reference is set to 128.
- sum = s_avg - 128;
+ sum = s_avg - d_avg;
sse = sum * sum;
}
- // If variance is based on 4x4 downsampling, we stop here and have
+ // If variance is based on 4x4 down-sampling, we stop here and have
// one sample for 4x4 block (so use 1 for count in fill_variance),
// which of course means variance = 0 for 4x4 block.
- fill_variance(sse, sum, 0, &vst2->split[m].part_variances.none);
+ fill_variance(sse, sum, 0, &vst2->split[m].part_variances.none);
}
}
}
}
}
+
// Fill the rest of the variance tree by summing split partition values.
for (i = 0; i < 4; i++) {
+ const int i2 = i << 2;
for (j = 0; j < 4; j++) {
- if (cm->frame_type == KEY_FRAME) {
+ if (variance4x4downsample[i2 + j] == 1) {
+ v16x16 *vtemp = (cm->frame_type != KEY_FRAME) ? &vt2[i2 + j] :
+ &vt.split[i].split[j];
for (m = 0; m < 4; m++) {
- fill_variance_tree(&vt.split[i].split[j].split[m], BLOCK_8X8);
+ fill_variance_tree(&vtemp->split[m], BLOCK_8X8);
}
+ fill_variance_tree(vtemp, BLOCK_16X16);
}
- fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
}
fill_variance_tree(&vt.split[i], BLOCK_32X32);
}
fill_variance_tree(&vt, BLOCK_64X64);
+
// Now go through the entire structure, splitting every block size until
// we get to one that's got a variance lower than our threshold.
if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
- !set_vt_partitioning(cpi, xd, &vt, BLOCK_64X64, mi_row, mi_col)) {
+ !set_vt_partitioning(cpi, xd, &vt, BLOCK_64X64, mi_row, mi_col,
+ threshold_bsize_max, BLOCK_16X16)) {
for (i = 0; i < 4; ++i) {
const int x32_idx = ((i & 1) << 2);
const int y32_idx = ((i >> 1) << 2);
+ const int i2 = i << 2;
if (!set_vt_partitioning(cpi, xd, &vt.split[i], BLOCK_32X32,
- (mi_row + y32_idx), (mi_col + x32_idx))) {
+ (mi_row + y32_idx), (mi_col + x32_idx),
+ threshold, BLOCK_16X16)) {
for (j = 0; j < 4; ++j) {
const int x16_idx = ((j & 1) << 1);
const int y16_idx = ((j >> 1) << 1);
- // Note: If 8x8 downsampling is used for variance calculation we
- // cannot really select block size 8x8 (or even 8x16/16x8), since we
- // don't have sufficient samples for variance. So on delta frames,
- // 8x8 partition is only set if variance of the 16x16 block is very
- // high. For key frames, 4x4 downsampling is used, so we can better
- // select 8x16/16x8 and 8x8. 4x4 partition can potentially be set
- // used here too, but for now 4x4 is not allowed.
- if (!set_vt_partitioning(cpi, xd, &vt.split[i].split[j],
- BLOCK_16X16,
+ // TODO(marpan): Allow 4x4 partitions for inter-frames.
+ // use_4x4_partition = (variance4x4downsample[i2 + j] == 1);
+ // If 4x4 partition is not used, then 8x8 partition will be selected
+ // if variance of 16x16 block is very high, so use larger threshold
+ // for 16x16 (threshold_bsize_min) in that case.
+ uint64_t threshold_16x16 = (use_4x4_partition) ? threshold :
+ threshold_bsize_min;
+ BLOCK_SIZE bsize_min = (use_4x4_partition) ? BLOCK_8X8 : BLOCK_16X16;
+ // For inter frames: if variance4x4downsample[] == 1 for this 16x16
+ // block, then the variance is based on 4x4 down-sampling, so use vt2
+ // in set_vt_partioning(), otherwise use vt.
+ v16x16 *vtemp = (cm->frame_type != KEY_FRAME &&
+ variance4x4downsample[i2 + j] == 1) ?
+ &vt2[i2 + j] : &vt.split[i].split[j];
+ if (!set_vt_partitioning(cpi, xd, vtemp, BLOCK_16X16,
mi_row + y32_idx + y16_idx,
- mi_col + x32_idx + x16_idx)) {
+ mi_col + x32_idx + x16_idx,
+ threshold_16x16, bsize_min)) {
for (k = 0; k < 4; ++k) {
const int x8_idx = (k & 1);
const int y8_idx = (k >> 1);
- if (cm->frame_type == KEY_FRAME) {
- if (!set_vt_partitioning(cpi, xd,
- &vt.split[i].split[j].split[k],
+ if (use_4x4_partition) {
+ if (!set_vt_partitioning(cpi, xd, &vtemp->split[k],
BLOCK_8X8,
mi_row + y32_idx + y16_idx + y8_idx,
- mi_col + x32_idx + x16_idx + x8_idx)) {
- set_block_size(cpi, xd,
- (mi_row + y32_idx + y16_idx + y8_idx),
- (mi_col + x32_idx + x16_idx + x8_idx),
- BLOCK_4X4);
+ mi_col + x32_idx + x16_idx + x8_idx,
+ threshold_bsize_min, BLOCK_8X8)) {
+ set_block_size(cpi, xd,
+ (mi_row + y32_idx + y16_idx + y8_idx),
+ (mi_col + x32_idx + x16_idx + x8_idx),
+ BLOCK_4X4);
}
} else {
set_block_size(cpi, xd,
(mi_row + y32_idx + y16_idx + y8_idx),
(mi_col + x32_idx + x16_idx + x8_idx),
BLOCK_8X8);
- }
+ }
}
}
}
#if CONFIG_VP9_HIGHBITDEPTH
cm->use_highbitdepth = oxcf->use_highbitdepth;
#endif
- cm->color_space = UNKNOWN;
+ cm->color_space = oxcf->color_space;
cm->width = oxcf->width;
cm->height = oxcf->height;
if (cm->profile != oxcf->profile)
cm->profile = oxcf->profile;
cm->bit_depth = oxcf->bit_depth;
+ cm->color_space = oxcf->color_space;
if (cm->profile <= PROFILE_1)
assert(cm->bit_depth == VPX_BITS_8);
cm->display_width = cpi->oxcf.width;
cm->display_height = cpi->oxcf.height;
+ cm->width = cpi->oxcf.width;
+ cm->height = cpi->oxcf.height;
if (cpi->initial_width) {
// Increasing the size of the frame beyond the first seen frame, or some
for (t = 0; t < cpi->num_workers; ++t) {
VP9Worker *const worker = &cpi->workers[t];
- EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;
+ EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
// Deallocate allocated threads.
vp9_get_worker_interface()->end(worker);
vp9_free_pc_tree(thread_data->td);
vpx_free(thread_data->td);
}
-
- vpx_free(worker->data1);
}
+ vpx_free(cpi->tile_thr_data);
vpx_free(cpi->workers);
+ if (cpi->num_workers > 1)
+ vp9_loop_filter_dealloc(&cpi->lf_row_sync);
+
dealloc_compressor_data(cpi);
for (i = 0; i < sizeof(cpi->mbgraph_stats) /
}
if (lf->filter_level > 0) {
- vp9_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
+ if (cpi->num_workers > 1)
+ vp9_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane,
+ lf->filter_level, 0, 0,
+ cpi->workers, cpi->num_workers,
+ &cpi->lf_row_sync);
+ else
+ vp9_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
}
vp9_extend_frame_inner_borders(cm->frame_to_show);
#include "vp9/common/vp9_ppflags.h"
#include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_loopfilter_thread.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_thread.h"
#include "vp9/encoder/vp9_svc_layercontext.h"
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_variance.h"
+
#if CONFIG_VP9_TEMPORAL_DENOISING
#include "vp9/encoder/vp9_denoiser.h"
#endif
#if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth;
#endif
+ vpx_color_space_t color_space;
} VP9EncoderConfig;
static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
PC_TREE *pc_root;
} ThreadData;
+struct EncWorkerData;
+
typedef struct VP9_COMP {
QUANTS quants;
ThreadData td;
// Multi-threading
int num_workers;
VP9Worker *workers;
+ struct EncWorkerData *tile_thr_data;
+ VP9LfSync lf_row_sync;
} VP9_COMP;
void vp9_initialize_enc(void);
CHECK_MEM_ERROR(cm, cpi->workers,
vpx_malloc(num_workers * sizeof(*cpi->workers)));
+ CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
+ vpx_calloc(num_workers, sizeof(*cpi->tile_thr_data)));
+
for (i = 0; i < num_workers; i++) {
VP9Worker *const worker = &cpi->workers[i];
- EncWorkerData *thread_data;
+ EncWorkerData *thread_data = &cpi->tile_thr_data[i];
++cpi->num_workers;
-
winterface->init(worker);
- CHECK_MEM_ERROR(cm, worker->data1,
- (EncWorkerData*)vpx_calloc(1, sizeof(EncWorkerData)));
- thread_data = (EncWorkerData*)worker->data1;
if (i < num_workers - 1) {
thread_data->cpi = cpi;
// Allocate thread data.
CHECK_MEM_ERROR(cm, thread_data->td,
- vpx_calloc(1, sizeof(*thread_data->td)));
+ vpx_memalign(32, sizeof(*thread_data->td)));
+ vp9_zero(*thread_data->td);
+
// Set up pc_tree.
thread_data->td->leaf_tree = NULL;
thread_data->td->pc_tree = NULL;
thread_data->td = &cpi->td;
}
- // data2 is unused.
- worker->data2 = NULL;
-
winterface->sync(worker);
- worker->hook = (VP9WorkerHook)enc_worker_hook;
}
}
for (i = 0; i < num_workers; i++) {
VP9Worker *const worker = &cpi->workers[i];
- EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;
+ EncWorkerData *thread_data;
+
+ worker->hook = (VP9WorkerHook)enc_worker_hook;
+ worker->data1 = &cpi->tile_thr_data[i];
+ worker->data2 = NULL;
+ thread_data = (EncWorkerData*)worker->data1;
// Before encoding a frame, copy the thread data from cpi.
thread_data->td->mb = cpi->td.mb;
twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0);
+ // Was the group length constrained by the requirement for a new KF?
+ rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
+
// Set the interval until the next gf.
if (cpi->common.frame_type == KEY_FRAME || rc->source_alt_ref_active)
rc->baseline_gf_interval = i - 1;
VP9_COMMON *const cm = &cpi->common;
int64_t filt_err;
- vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, 1,
- partial_frame);
+ if (cpi->num_workers > 1)
+ vp9_loop_filter_frame_mt(cm->frame_to_show, cm, cpi->td.mb.e_mbd.plane,
+ filt_level, 1, partial_frame,
+ cpi->workers, cpi->num_workers, &cpi->lf_row_sync);
+ else
+ vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level,
+ 1, partial_frame);
+
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
filt_err = vp9_highbd_get_y_sse(sd, cm->frame_to_show);
// better than that already stored.
// This is used to help set quality in forced key frames to reduce popping
if ((qindex < rc->last_boosted_qindex) ||
- (((cm->frame_type == KEY_FRAME) || cpi->refresh_alt_ref_frame ||
+ (cm->frame_type == KEY_FRAME) ||
+ (!rc->constrained_gf_group &&
+ (cpi->refresh_alt_ref_frame ||
(cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) {
rc->last_boosted_qindex = qindex;
}
rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
// NOTE: frames_till_gf_update_due must be <= frames_to_key.
- if (rc->frames_till_gf_update_due > rc->frames_to_key)
+ if (rc->frames_till_gf_update_due > rc->frames_to_key) {
rc->frames_till_gf_update_due = rc->frames_to_key;
+ rc->constrained_gf_group = 1;
+ } else {
+ rc->constrained_gf_group = 0;
+ }
cpi->refresh_golden_frame = 1;
rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
rc->gfu_boost = DEFAULT_GF_BOOST;
int max_gf_interval;
int static_scene_max_gf_interval;
int baseline_gf_interval;
+ int constrained_gf_group;
int frames_to_key;
int frames_since_key;
int this_key_frame_forced;
rd->thresh_mult[THR_NEWA] += 1000;
rd->thresh_mult[THR_NEWG] += 1000;
- // Adjust threshold only in real time mode, which only uses last
- // reference frame.
- rd->thresh_mult[THR_NEWMV] += sf->elevate_newmv_thresh;
-
rd->thresh_mult[THR_NEARMV] += 1000;
rd->thresh_mult[THR_NEARA] += 1000;
rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
sf->always_this_block_size = BLOCK_16X16;
sf->search_type_check_frequency = 50;
sf->encode_breakout_thresh = 0;
- sf->elevate_newmv_thresh = 0;
// Recode loop tolerance %.
sf->recode_tolerance = 25;
sf->default_interp_filter = SWITCHABLE;
// enabled in real time mode.
int encode_breakout_thresh;
- // In real time encoding, increase the threshold for NEWMV.
- int elevate_newmv_thresh;
-
// default interp filter choice
INTERP_FILTER default_interp_filter;
int vp9_svc_start_frame(VP9_COMP *const cpi) {
int width = 0, height = 0;
LAYER_CONTEXT *lc;
+ struct lookahead_entry *buf;
int count = 1 << (cpi->svc.number_temporal_layers - 1);
cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
// since its previous frame could be changed during decoding time. The idea is
// we put a empty invisible frame in front of them, then we will not use
// prev_mi when encoding these frames.
+
+ buf = vp9_lookahead_peek(cpi->lookahead, 0);
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2 &&
- cpi->svc.encode_empty_frame_state == NEED_TO_ENCODE) {
+ cpi->svc.encode_empty_frame_state == NEED_TO_ENCODE &&
+ lc->rc.frames_to_key != 0 &&
+ !(buf != NULL && (buf->flags & VPX_EFLAG_FORCE_KF))) {
if ((cpi->svc.number_temporal_layers > 1 &&
cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1) ||
(cpi->svc.number_spatial_layers > 1 &&
VP9_COMMON_SRCS-yes += common/vp9_enums.h
VP9_COMMON_SRCS-yes += common/vp9_idct.h
VP9_COMMON_SRCS-yes += common/vp9_loopfilter.h
+VP9_COMMON_SRCS-yes += common/vp9_loopfilter_thread.h
VP9_COMMON_SRCS-yes += common/vp9_mv.h
VP9_COMMON_SRCS-yes += common/vp9_onyxc_int.h
VP9_COMMON_SRCS-yes += common/vp9_pred_common.h
VP9_COMMON_SRCS-yes += common/vp9_tile_common.c
VP9_COMMON_SRCS-yes += common/vp9_loopfilter.c
VP9_COMMON_SRCS-yes += common/vp9_loopfilter_filters.c
+VP9_COMMON_SRCS-yes += common/vp9_loopfilter_thread.c
VP9_COMMON_SRCS-yes += common/vp9_mvref_common.c
VP9_COMMON_SRCS-yes += common/vp9_mvref_common.h
VP9_COMMON_SRCS-yes += common/vp9_quant_common.c
unsigned int frame_periodic_boost;
vpx_bit_depth_t bit_depth;
vp9e_tune_content content;
+ vpx_color_space_t color_space;
};
static struct vp9_extracfg default_extra_cfg = {
NO_AQ, // aq_mode
0, // frame_periodic_delta_q
VPX_BITS_8, // Bit depth
- VP9E_CONTENT_DEFAULT // content
+ VP9E_CONTENT_DEFAULT, // content
+ VPX_CS_UNKNOWN, // color space
};
struct vpx_codec_alg_priv {
size_t pending_frame_sizes[8];
size_t pending_frame_magnitude;
vpx_image_t preview_img;
+ vpx_enc_frame_flags_t next_frame_flags;
vp8_postproc_cfg_t preview_ppcfg;
vpx_codec_pkt_list_decl(256) pkt_list;
unsigned int fixed_kf_cntr;
cfg->g_bit_depth == VPX_BITS_8) {
ERROR("Codec bit-depth 8 not supported in profile > 1");
}
-
+ RANGE_CHECK(extra_cfg, color_space, VPX_CS_UNKNOWN, VPX_CS_SRGB);
return VPX_CODEC_OK;
}
oxcf->firstpass_mb_stats_in = cfg->rc_firstpass_mb_stats_in;
#endif
+ oxcf->color_space = extra_cfg->color_space;
oxcf->arnr_max_frames = extra_cfg->arnr_max_frames;
oxcf->arnr_strength = extra_cfg->arnr_strength;
static vpx_codec_err_t encoder_set_config(vpx_codec_alg_priv_t *ctx,
const vpx_codec_enc_cfg_t *cfg) {
vpx_codec_err_t res;
-
- if (cfg->g_w != ctx->cfg.g_w || cfg->g_h != ctx->cfg.g_h)
- ERROR("Cannot change width or height after initialization");
+ int force_key = 0;
+
+ if (cfg->g_w != ctx->cfg.g_w || cfg->g_h != ctx->cfg.g_h) {
+ if (cfg->g_lag_in_frames > 1 || cfg->g_pass != VPX_RC_ONE_PASS)
+ ERROR("Cannot change width or height after initialization");
+ if ((ctx->cpi->initial_width && (int)cfg->g_w > ctx->cpi->initial_width) ||
+ (ctx->cpi->initial_height && (int)cfg->g_h > ctx->cpi->initial_height))
+ ERROR("Cannot increase width or height larger than their initial values");
+ if (!valid_ref_frame_size(ctx->cfg.g_w, ctx->cfg.g_h, cfg->g_w, cfg->g_h))
+ force_key = 1;
+ }
// Prevent increasing lag_in_frames. This check is stricter than it needs
// to be -- the limit is not increasing past the first lag_in_frames
vp9_change_config(ctx->cpi, &ctx->oxcf);
}
+ if (force_key)
+ ctx->next_frame_flags |= VPX_EFLAG_FORCE_KF;
+
return res;
}
// Store the original flags in to the frame buffer. Will extract the
// key frame flag when we actually encode this frame.
- if (vp9_receive_raw_frame(cpi, flags,
+ if (vp9_receive_raw_frame(cpi, flags | ctx->next_frame_flags,
&sd, dst_time_stamp, dst_end_time_stamp)) {
res = update_error_state(ctx, &cpi->common.error);
}
+ ctx->next_frame_flags = 0;
}
cx_data = ctx->cx_data;
return update_extra_cfg(ctx, &extra_cfg);
}
+static vpx_codec_err_t ctrl_set_color_space(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.color_space = CAST(VP9E_SET_COLOR_SPACE, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{VP8_COPY_REFERENCE, ctrl_copy_reference},
{VP8E_UPD_ENTROPY, ctrl_update_entropy},
{VP9E_REGISTER_CX_CALLBACK, ctrl_register_cx_callback},
{VP9E_SET_SVC_LAYER_ID, ctrl_set_svc_layer_id},
{VP9E_SET_TUNE_CONTENT, ctrl_set_tune_content},
+ {VP9E_SET_COLOR_SPACE, ctrl_set_color_space},
{VP9E_SET_NOISE_SENSITIVITY, ctrl_set_noise_sensitivity},
// Getters
static int parse_bitdepth_colorspace_sampling(
BITSTREAM_PROFILE profile, struct vp9_read_bit_buffer *rb) {
- const int sRGB = 7;
- int colorspace;
+ vpx_color_space_t color_space;
if (profile >= PROFILE_2)
rb->bit_offset += 1; // Bit-depth 10 or 12.
- colorspace = vp9_rb_read_literal(rb, 3);
- if (colorspace != sRGB) {
+ color_space = (vpx_color_space_t)vp9_rb_read_literal(rb, 3);
+ if (color_space != VPX_CS_SRGB) {
rb->bit_offset += 1; // [16,235] (including xvycc) vs [0,255] range.
if (profile == PROFILE_1 || profile == PROFILE_3) {
rb->bit_offset += 2; // subsampling x/y.
bps = 12;
}
}
+ img->cs = yv12->color_space;
img->bit_depth = 8;
img->w = yv12->y_stride;
img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3);
yv12->y_stride = img->stride[VPX_PLANE_Y];
yv12->uv_stride = img->stride[VPX_PLANE_U];
+ yv12->color_space = img->cs;
#if CONFIG_VP9_HIGHBITDEPTH
if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_error_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_avx2.c
+VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_avg_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_sad_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_dct_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_variance_neon.c
VP9_DX_SRCS-yes += decoder/vp9_decodeframe.c
VP9_DX_SRCS-yes += decoder/vp9_decodeframe.h
VP9_DX_SRCS-yes += decoder/vp9_detokenize.c
-VP9_DX_SRCS-yes += decoder/vp9_dthread.c
-VP9_DX_SRCS-yes += decoder/vp9_dthread.h
VP9_DX_SRCS-yes += decoder/vp9_reader.h
VP9_DX_SRCS-yes += decoder/vp9_reader.c
VP9_DX_SRCS-yes += decoder/vp9_read_bit_buffer.c
#ifndef VPX_VP8CX_H_
#define VPX_VP8CX_H_
-/*!\defgroup vp8_encoder WebM VP8 Encoder
+/*!\defgroup vp8_encoder WebM VP8/VP9 Encoder
* \ingroup vp8
*
* @{
#include "./vp8.h"
/*!\file
- * \brief Provides definitions for using the VP8 encoder algorithm within the
+ * \brief Provides definitions for using VP8 or VP9 encoder algorithm within the
* vpx Codec Interface.
*/
/*!\name Algorithm interface for VP8
*
- * This interface provides the capability to encode raw VP8 streams, as would
- * be found in AVI files.
+ * This interface provides the capability to encode raw VP8 streams.
* @{
*/
extern vpx_codec_iface_t vpx_codec_vp8_cx_algo;
extern vpx_codec_iface_t *vpx_codec_vp8_cx(void);
+/*!@} - end algorithm interface member group*/
-/* TODO(jkoleszar): These move to VP9 in a later patch set. */
+/*!\name Algorithm interface for VP9
+ *
+ * This interface provides the capability to encode raw VP9 streams.
+ * @{
+ */
extern vpx_codec_iface_t vpx_codec_vp9_cx_algo;
extern vpx_codec_iface_t *vpx_codec_vp9_cx(void);
-
/*!@} - end algorithm interface member group*/
VP8E_SET_SCREEN_CONTENT_MODE, /**<control function to set encoder screen content mode */
- /* TODO(jkoleszar): Move to vp9cx.h */
+ /*!\brief Codec control function to set lossless encoding mode
+ *
+ * VP9 can operate in lossless encoding mode, in which the bitstream
+ * produced will be able to decode and reconstruct a perfect copy of
+ * input source. This control function provides a mean to switch encoder
+ * into lossless coding mode(1) or normal coding mode(0) that may be lossy.
+ * 0 = lossy coding mode
+ * 1 = lossless coding mode
+ *
+ * By default, encoder operates in normal coding mode (maybe lossy).
+ */
VP9E_SET_LOSSLESS,
+
+ /*!\brief Codec control function to set number of tile columns
+ *
+ * In encoding and decoding, VP9 allows an input image frame be partitioned
+ * into separated vertical tile columns, which can be encoded or decoded
+ * independently. This enables easy implementation of parallel encoding and
+ * decoding. This control requests the encoder to use column tiles in
+ * encoding an input frame, with number of tile columns (in Log2 unit) as
+ * the parameter:
+ * 0 = 1 tile column
+ * 1 = 2 tile columns
+ * 2 = 4 tile columns
+ * .....
+ * n = 2**n tile columns
+ * The requested tile columns will be capped by encoder based on image size
+ * limitation (The minimum width of a tile column is 256 pixel, the maximum
+ * is 4096).
+ *
+ * By default, the value is 0, i.e. one single column tile for entire image.
+ */
VP9E_SET_TILE_COLUMNS,
+
+ /*!\brief Codec control function to set number of tile rows
+ *
+ * In encoding and decoding, VP9 allows an input image frame be partitioned
+ * into separated horizontal tile rows. Tile rows are encoded or decoded
+ * sequentially. Even though encoding/decoding of later tile rows depends on
+ * earlier ones, this allows the encoder to output data packets for tile rows
+ * prior to completely processing all tile rows in a frame, thereby reducing
+ * the latency in processing between input and output. The parameter
+ * for this control describes the number of tile rows, which has a valid
+ * range [0, 2]:
+ * 0 = 1 tile row
+ * 1 = 2 tile rows
+ * 2 = 4 tile rows
+ *
+ * By default, the value is 0, i.e. one single row tile for entire image.
+ */
VP9E_SET_TILE_ROWS,
+
+ /*!\brief Codec control function to enable frame parallel decoding feature
+ *
+ * VP9 has a bitstream feature to reduce decoding dependency between frames
+ * by turning off backward update of probability context used in encoding
+ * and decoding. This allows staged parallel processing of more than one
+ * video frames in the decoder. This control function provides a mean to
+ * turn this feature on or off for bitstreams produced by encoder.
+ *
+ * By default, this feature is off.
+ */
VP9E_SET_FRAME_PARALLEL_DECODING,
+
+ /*!\brief Codec control function to set adaptive quantization mode
+ *
+ * VP9 has a segment based feature that allows encoder to adaptively change
+ * quantization parameter for each segment within a frame to improve the
+ * subjective quality. This control makes encoder operate in one of the
+ * several AQ_modes supported.
+ *
+ * By default, encoder operates with AQ_Mode 0(adaptive quantization off).
+ */
VP9E_SET_AQ_MODE,
+
+ /*!\brief Codec control function to enable/disable periodic Q boost
+ *
+ * One VP9 encoder speed feature is to enable quality boost by lowering
+ * frame level Q periodically. This control function provides a mean to
+ * turn on/off this feature.
+ * 0 = off
+ * 1 = on
+ *
+ * By default, the encoder is allowed to use this feature for appropriate
+ * encoding modes.
+ */
VP9E_SET_FRAME_PERIODIC_BOOST,
+
/*!\brief control function to set noise sensitivity
*
* 0: off, 1: OnYOnly
*/
VP9E_SET_NOISE_SENSITIVITY,
+ /*!\brief control function to turn on/off SVC in encoder.
+ * \note Return value is VPX_CODEC_INVALID_PARAM if the encoder does not
+ * support SVC in its current encoding mode
+ * 0: off, 1: on
+ */
VP9E_SET_SVC,
+
+ /*!\brief control function to set parameters for SVC.
+ * \note Parameters contain min_q, max_q, scaling factor for each of the
+ * SVC layers.
+ */
VP9E_SET_SVC_PARAMETERS,
/*!\brief control function to set svc layer for spatial and temporal.
* temporal layer.
*/
VP9E_SET_SVC_LAYER_ID,
+
+ /*!\brief control function to set content type.
+ * \note Valid parameter range:
+ * VP9E_CONTENT_DEFAULT = Regular video content (Default)
+ * VP9E_CONTENT_SCREEN = Screen capture content
+ */
VP9E_SET_TUNE_CONTENT,
+
+ /*!\brief control function to get svc layer ID.
+ * \note The layer ID returned is for the data packet from the registered
+ * callback function.
+ */
VP9E_GET_SVC_LAYER_ID,
+
+ /*!\brief control function to register callback for getting per layer packet.
+ * \note Parameter for this control function is a structure with a callback
+ * function and a pointer to private data used by the callback.
+ */
VP9E_REGISTER_CX_CALLBACK,
+
+ /*!\brief control function to set color space info.
+ * \note Valid ranges: 0..7, default is "UNKNOWN".
+ * 0 = UNKNOWN,
+ * 1 = BT_601
+ * 2 = BT_709
+ * 3 = SMPTE_170
+ * 4 = SMPTE_240
+ * 5 = BT_2020
+ * 6 = RESERVED
+ * 7 = SRGB
+ */
+ VP9E_SET_COLOR_SPACE,
};
/*!\brief vpx 1-D scaling mode
VPX_CTRL_USE_TYPE(VP9E_SET_NOISE_SENSITIVITY, unsigned int)
VPX_CTRL_USE_TYPE(VP9E_SET_TUNE_CONTENT, int) /* vp9e_tune_content */
+
+VPX_CTRL_USE_TYPE(VP9E_SET_COLOR_SPACE, int)
/*! @} - end defgroup vp8_encoder */
#ifdef __cplusplus
} // extern "C"
*/
-/*!\defgroup vp8_decoder WebM VP8 Decoder
+/*!\defgroup vp8_decoder WebM VP8/VP9 Decoder
* \ingroup vp8
*
* @{
*/
/*!\file
- * \brief Provides definitions for using the VP8 algorithm within the vpx Decoder
+ * \brief Provides definitions for using VP8 or VP9 within the vpx Decoder
* interface.
*/
#ifndef VPX_VP8DX_H_
/*!\name Algorithm interface for VP8
*
- * This interface provides the capability to decode raw VP8 streams, as would
- * be found in AVI files and other non-Flash uses.
+ * This interface provides the capability to decode VP8 streams.
* @{
*/
extern vpx_codec_iface_t vpx_codec_vp8_dx_algo;
extern vpx_codec_iface_t *vpx_codec_vp8_dx(void);
+/*!@} - end algorithm interface member group*/
-/* TODO(jkoleszar): These move to VP9 in a later patch set. */
+/*!\name Algorithm interface for VP9
+ *
+ * This interface provides the capability to decode VP9 streams.
+ * @{
+ */
extern vpx_codec_iface_t vpx_codec_vp9_dx_algo;
extern vpx_codec_iface_t *vpx_codec_vp9_dx(void);
/*!@} - end algorithm interface member group*/
*/
VP9_SET_BYTE_ALIGNMENT,
- /** For testing. */
+ /** control function to invert the decoding order to from right to left. The
+ * function is used in a test to confirm the decoding independence of tile
+ * columns. The function may be used in application where this order
+ * of decoding is desired.
+ *
+ * TODO(yaowu): Rework the unit test that uses this control, and in a future
+ * release, this test-only control shall be removed.
+ */
VP9_INVERT_TILE_DECODE_ORDER,
VP8_DECODER_CTRL_ID_MAX
/*!\brief Callback function pointer / user data pair storage */
typedef struct vpx_codec_enc_output_cx_cb_pair {
- vpx_codec_enc_output_cx_pkt_cb_fn_t output_cx_pkt;
- void *user_priv;
+ vpx_codec_enc_output_cx_pkt_cb_fn_t output_cx_pkt; /**< Callback function */
+ void *user_priv; /**< Pointer to private data */
} vpx_codec_priv_output_cx_pkt_cb_pair_t;
/*!\brief Rational Number
*
*/
typedef struct vpx_svc_parameters {
- int max_quantizers[VPX_SS_MAX_LAYERS];
- int min_quantizers[VPX_SS_MAX_LAYERS];
- int scaling_factor_num[VPX_SS_MAX_LAYERS];
- int scaling_factor_den[VPX_SS_MAX_LAYERS];
+ int max_quantizers[VPX_SS_MAX_LAYERS]; /**< Max Q for each layer */
+ int min_quantizers[VPX_SS_MAX_LAYERS]; /**< Min Q for each layer */
+ int scaling_factor_num[VPX_SS_MAX_LAYERS]; /**< Scaling factor-numerator*/
+ int scaling_factor_den[VPX_SS_MAX_LAYERS]; /**< Scaling factor-denominator*/
} vpx_svc_extra_cfg_t;
* types, removing or reassigning enums, adding/removing/rearranging
* fields to structures
*/
-#define VPX_IMAGE_ABI_VERSION (2) /**<\hideinitializer*/
+#define VPX_IMAGE_ABI_VERSION (3) /**<\hideinitializer*/
#define VPX_IMG_FMT_PLANAR 0x100 /**< Image is a planar format. */
VPX_IMG_FMT_I44016 = VPX_IMG_FMT_I440 | VPX_IMG_FMT_HIGHBITDEPTH
} vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */
+ /*!\brief List of supported color spaces */
+ typedef enum vpx_color_space {
+ VPX_CS_UNKNOWN = 0, /**< Unknown */
+ VPX_CS_BT_601 = 1, /**< BT.601 */
+ VPX_CS_BT_709 = 2, /**< BT.709 */
+ VPX_CS_SMPTE_170 = 3, /**< SMPTE.170 */
+ VPX_CS_SMPTE_240 = 4, /**< SMPTE.240 */
+ VPX_CS_BT_2020 = 5, /**< BT.2020 */
+ VPX_CS_RESERVED = 6, /**< Reserved */
+ VPX_CS_SRGB = 7 /**< sRGB */
+ } vpx_color_space_t; /**< alias for enum vpx_color_space */
+
/**\brief Image Descriptor */
typedef struct vpx_image {
vpx_img_fmt_t fmt; /**< Image Format */
+ vpx_color_space_t cs; /**< Color Space */
/* Image storage dimensions */
unsigned int w; /**< Stored image width */
#define VPX_PORTS_X86_H_
#include <stdlib.h>
#include "vpx_config.h"
+#include "vpx/vpx_integer.h"
#ifdef __cplusplus
extern "C" {
#endif
#endif /* end others */
+// NaCl has no support for xgetbv or the raw opcode.
+#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
+static INLINE uint64_t xgetbv(void) {
+ const uint32_t ecx = 0;
+ uint32_t eax, edx;
+ // Use the raw opcode for xgetbv for compatibility with older toolchains.
+ __asm__ volatile (
+ ".byte 0x0f, 0x01, 0xd0\n"
+ : "=a"(eax), "=d"(edx) : "c" (ecx));
+ return ((uint64_t)edx << 32) | eax;
+}
+#elif (defined(_M_X64) || defined(_M_IX86)) && \
+ defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
+#include <immintrin.h>
+#define xgetbv() _xgetbv(0)
+#elif defined(_MSC_VER) && defined(_M_IX86)
+static INLINE uint64_t xgetbv(void) {
+ uint32_t eax_, edx_;
+ __asm {
+ xor ecx, ecx // ecx = 0
+ // Use the raw opcode for xgetbv for compatibility with older toolchains.
+ __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
+ mov eax_, eax
+ mov edx_, edx
+ }
+ return ((uint64_t)edx_ << 32) | eax_;
+}
+#else
+#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
+#endif
+
#define HAS_MMX 0x01
#define HAS_SSE 0x02
#define HAS_SSE2 0x04
if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
- if (reg_ecx & BIT(28)) flags |= HAS_AVX;
+ // bits 27 (OSXSAVE) & 28 (256-bit AVX)
+ if (reg_ecx & (BIT(27) | BIT(28))) {
+ if ((xgetbv() & 0x6) == 0x6) {
+ flags |= HAS_AVX;
- /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
- reg_eax = 7;
- reg_ecx = 0;
- cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
+ /* Get the leaf 7 feature flags. Needed to check for AVX2 support */
+ cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
- if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
+ if (reg_ebx & BIT(5)) flags |= HAS_AVX2;
+ }
+ }
return flags & mask;
}
int subsampling_x;
int subsampling_y;
unsigned int bit_depth;
+ vpx_color_space_t color_space;
int corrupted;
int flags;
VP8E_SET_MAX_INTER_BITRATE_PCT, VP8E_SET_GF_CBR_BOOST_PCT,
VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE,
VP9E_SET_FRAME_PERIODIC_BOOST, VP9E_SET_NOISE_SENSITIVITY,
- VP9E_SET_TUNE_CONTENT,
+ VP9E_SET_TUNE_CONTENT, VP9E_SET_COLOR_SPACE,
0
};
#endif