From: Ivan Maltz Date: Thu, 5 Sep 2013 15:55:47 +0000 (-0700) Subject: API extensions and sample app for spacial scalable encoder X-Git-Tag: v1.3.0~472^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=01b35c3c160e6158babfce046360bdc6a09d5c17;p=platform%2Fupstream%2Flibvpx.git API extensions and sample app for spacial scalable encoder Sample app: vp9_spatial_scalable_encoder vpx_codec_control extensions: VP9E_SET_SVC VP9E_SET_WIDTH, VP9E_SET_HEIGHT, VP9E_SET_LAYER VP9E_SET_MIN_Q, VP9E_SET_MAX_Q expanded buffer size for vp9_convolve modified setting of initial width in vp9_onyx_if.c so that layer size can be set prior to initial encode Default number of layers set to 3 (VPX_SS_DEFAULT_LAYERS) Number of layers set explicitly in vpx_codec_enc_cfg.ss_number_layers Change-Id: I2c7a6fe6d665113671337032f7ad032430ac4197 --- diff --git a/examples.mk b/examples.mk index 5b5ca23..c17fac9 100644 --- a/examples.mk +++ b/examples.mk @@ -49,6 +49,9 @@ vpxenc.DESCRIPTION = Full featured encoder UTILS-$(CONFIG_VP8_ENCODER) += vp8_scalable_patterns.c vp8_scalable_patterns.GUID = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder +UTILS-$(CONFIG_VP8_ENCODER) += vp9_spatial_scalable_encoder.c +vp8_scalable_patterns.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D +vp8_scalable_patterns.DESCRIPTION = Spatial Scalable Encoder # Clean up old ivfenc, ivfdec binaries. ifeq ($(CONFIG_MSVS),yes) diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index 9a7b9c5..95baf9b 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -1272,7 +1272,7 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] = 1, /* g_delete_first_pass_file */ "vp8.fpf" /* first pass filename */ #endif - + VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */ 1, /* ts_number_layers */ {0}, /* ts_target_bitrate */ {0}, /* ts_rate_decimator */ diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c index be092f4..94231a1 100644 --- a/vp9/common/vp9_convolve.c +++ b/vp9/common/vp9_convolve.c @@ -191,17 +191,21 @@ static void convolve_c(const uint8_t *src, ptrdiff_t src_stride, const int16_t *filter_y, int y_step_q4, int w, int h, int taps) { /* Fixed size intermediate buffer places limits on parameters. - * Maximum intermediate_height is 135, for y_step_q4 == 32, + * Maximum intermediate_height is 324, for y_step_q4 == 80, * h == 64, taps == 8. + * y_step_q4 of 80 allows for 1/10 scale for 5 layer svc */ - uint8_t temp[64 * 135]; + uint8_t temp[64 * 324]; int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + taps; assert(w <= 64); assert(h <= 64); assert(taps <= 8); - assert(y_step_q4 <= 32); - assert(x_step_q4 <= 32); + assert(y_step_q4 <= 80); + assert(x_step_q4 <= 80); + + if (intermediate_height < h) + intermediate_height = h; convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64, filter_x, x_step_q4, filter_y, y_step_q4, w, diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h index 152046f..b237f9e 100644 --- a/vp9/common/vp9_onyx.h +++ b/vp9/common/vp9_onyx.h @@ -130,6 +130,8 @@ extern "C" // END DATARATE CONTROL OPTIONS // ---------------------------------------------------------------- + // Spatial scalability + int ss_number_layers; // these parameters aren't to be used in final build don't use!!! int play_alternate; @@ -210,6 +212,13 @@ extern "C" int vp9_set_internal_size(VP9_PTR comp, VPX_SCALING horiz_mode, VPX_SCALING vert_mode); + int vp9_set_size_literal(VP9_PTR comp, unsigned int width, + unsigned int height); + + int vp9_switch_layer(VP9_PTR comp, int layer); + + void vp9_set_svc(VP9_PTR comp, int use_svc); + int vp9_get_quantizer(VP9_PTR c); #ifdef __cplusplus diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index f3bbc17..3f2844c 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -1208,7 +1208,8 @@ static int get_refresh_mask(VP9_COMP *cpi) { if (!cpi->multi_arf_enabled && cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) { #else - if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) { + if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame && + !cpi->use_svc) { #endif // Preserve the previously existing golden frame and update the frame in // the alt ref slot instead. This is highly specific to the use of @@ -1320,9 +1321,16 @@ static void write_frame_size_with_refs(VP9_COMP *cpi, YV12_BUFFER_CONFIG *cfg = &cm->yv12_fb[cm->ref_frame_map[refs[i]]]; found = cm->width == cfg->y_crop_width && cm->height == cfg->y_crop_height; + + // TODO(ivan): This prevents a bug while more than 3 buffers are used. Do it + // in a better way. + if (cpi->use_svc) { + found = 0; + } vp9_wb_write_bit(wb, found); - if (found) + if (found) { break; + } } if (!found) { diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index e9c214f..5f2f2ba 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -1188,6 +1188,9 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->gld_fb_idx = 1; cpi->alt_fb_idx = 2; + cpi->current_layer = 0; + cpi->use_svc = 0; + set_tile_limits(cpi); cpi->fixed_divide[0] = 0; @@ -1458,6 +1461,9 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->alt_is_last = 0; cpi->gold_is_alt = 0; + // Spatial scalability + cpi->number_spatial_layers = oxcf->ss_number_layers; + // Create the encoder segmentation map and set all entries to 0 CHECK_MEM_ERROR(cm, cpi->segmentation_map, vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); @@ -2262,6 +2268,12 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { cpi->oxcf.play_alternate && !cpi->refresh_alt_ref_frame) { cpi->source_alt_ref_pending = 1; cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; + + // TODO(ivan): for SVC encoder, GF automatic update is disabled by using a + // large GF_interval + if (cpi->use_svc) { + cpi->frames_till_gf_update_due = INT_MAX; + } } if (!cpi->source_alt_ref_pending) @@ -2427,7 +2439,8 @@ static void update_reference_frames(VP9_COMP * const cpi) { else if (!cpi->multi_arf_enabled && cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) { #else - else if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) { + else if (cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame && + !cpi->use_svc) { #endif /* Preserve the previously existing golden frame and update the frame in * the alt ref slot instead. This is highly specific to the current use of @@ -2500,9 +2513,11 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { static void scale_references(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; int i; + int refs[ALLOWED_REFS_PER_FRAME] = {cpi->lst_fb_idx, cpi->gld_fb_idx, + cpi->alt_fb_idx}; for (i = 0; i < 3; i++) { - YV12_BUFFER_CONFIG *ref = &cm->yv12_fb[cm->ref_frame_map[i]]; + YV12_BUFFER_CONFIG *ref = &cm->yv12_fb[cm->ref_frame_map[refs[i]]]; if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) { @@ -2515,8 +2530,8 @@ static void scale_references(VP9_COMP *cpi) { scale_and_extend_frame(ref, &cm->yv12_fb[new_fb]); cpi->scaled_ref_idx[i] = new_fb; } else { - cpi->scaled_ref_idx[i] = cm->ref_frame_map[i]; - cm->fb_idx_ref_cnt[cm->ref_frame_map[i]]++; + cpi->scaled_ref_idx[i] = cm->ref_frame_map[refs[i]]; + cm->fb_idx_ref_cnt[cm->ref_frame_map[refs[i]]]++; } } } @@ -3586,24 +3601,28 @@ static void Pass2Encode(VP9_COMP *cpi, unsigned long *size, } } - -int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags, - YV12_BUFFER_CONFIG *sd, int64_t time_stamp, - int64_t end_time) { - VP9_COMP *cpi = (VP9_COMP *) ptr; +static void check_initial_width(VP9_COMP *cpi, YV12_BUFFER_CONFIG *sd) { VP9_COMMON *cm = &cpi->common; - struct vpx_usec_timer timer; - int res = 0; - if (!cpi->initial_width) { // TODO(jkoleszar): Support 1/4 subsampling? - cm->subsampling_x = sd->uv_width < sd->y_width; - cm->subsampling_y = sd->uv_height < sd->y_height; + cm->subsampling_x = (sd != NULL) && sd->uv_width < sd->y_width; + cm->subsampling_y = (sd != NULL) && sd->uv_height < sd->y_height; alloc_raw_frame_buffers(cpi); cpi->initial_width = cm->width; cpi->initial_height = cm->height; } +} + + +int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags, + YV12_BUFFER_CONFIG *sd, int64_t time_stamp, + int64_t end_time) { + VP9_COMP *cpi = (VP9_COMP *) ptr; + struct vpx_usec_timer timer; + int res = 0; + + check_initial_width(cpi, sd); vpx_usec_timer_start(&timer); if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, frame_flags, cpi->active_map_enabled ? cpi->active_map : NULL)) @@ -4141,7 +4160,76 @@ int vp9_set_internal_size(VP9_PTR comp, return 0; } +int vp9_set_size_literal(VP9_PTR comp, unsigned int width, + unsigned int height) { + VP9_COMP *cpi = (VP9_COMP *)comp; + VP9_COMMON *cm = &cpi->common; + + check_initial_width(cpi, NULL); + + if (width) { + cm->width = width; + if (cm->width * 5 < cpi->initial_width) { + cm->width = cpi->initial_width / 5 + 1; + printf("Warning: Desired width too small, changed to %d \n", cm->width); + } + if (cm->width > cpi->initial_width) { + cm->width = cpi->initial_width; + printf("Warning: Desired width too large, changed to %d \n", cm->width); + } + } + + if (height) { + cm->height = height; + if (cm->height * 5 < cpi->initial_height) { + cm->height = cpi->initial_height / 5 + 1; + printf("Warning: Desired height too small, changed to %d \n", cm->height); + } + if (cm->height > cpi->initial_height) { + cm->height = cpi->initial_height; + printf("Warning: Desired height too large, changed to %d \n", cm->height); + } + } + + assert(cm->width <= cpi->initial_width); + assert(cm->height <= cpi->initial_height); + update_frame_size(cpi); + return 0; +} + +int vp9_switch_layer(VP9_PTR comp, int layer) { + VP9_COMP *cpi = (VP9_COMP *)comp; + + if (cpi->use_svc) { + cpi->current_layer = layer; + + // Use buffer i for layer i LST + cpi->lst_fb_idx = layer; + + // Use buffer i-1 for layer i Alt (Inter-layer prediction) + if (layer != 0) cpi->alt_fb_idx = layer - 1; + + // Use the rest for Golden + if (layer < 2 * cpi->number_spatial_layers - NUM_REF_FRAMES) + cpi->gld_fb_idx = cpi->lst_fb_idx; + else + cpi->gld_fb_idx = 2 * cpi->number_spatial_layers - 1 - layer; + + printf("Switching to layer %d:\n", layer); + printf("Using references: LST/GLD/ALT [%d|%d|%d]\n", cpi->lst_fb_idx, + cpi->gld_fb_idx, cpi->alt_fb_idx); + } else { + printf("Switching layer not supported. Enable SVC first \n"); + } + return 0; +} +void vp9_set_svc(VP9_PTR comp, int use_svc) { + VP9_COMP *cpi = (VP9_COMP *)comp; + cpi->use_svc = use_svc; + if (cpi->use_svc) printf("Enabled SVC encoder \n"); + return; +} int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) { int i, j; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 252e982..2dbd7a0 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -351,6 +351,10 @@ typedef struct VP9_COMP { int lst_fb_idx; int gld_fb_idx; int alt_fb_idx; + + int current_layer; + int use_svc; + #if CONFIG_MULTIPLE_ARF int alt_ref_fb_idx[NUM_REF_FRAMES - 3]; #endif @@ -650,6 +654,8 @@ typedef struct VP9_COMP { int initial_width; int initial_height; + int number_spatial_layers; + #if CONFIG_MULTIPLE_ARF // ARF tracking variables. int multi_arf_enabled; @@ -684,6 +690,17 @@ static int get_ref_frame_idx(VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { } } +static int get_scale_ref_frame_idx(VP9_COMP *cpi, + MV_REFERENCE_FRAME ref_frame) { + if (ref_frame == LAST_FRAME) { + return 0; + } else if (ref_frame == GOLDEN_FRAME) { + return 1; + } else { + return 2; + } +} + void vp9_encode_frame(VP9_COMP *cpi); void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 39b6544..6c67d55 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2278,8 +2278,9 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) { YV12_BUFFER_CONFIG *scaled_ref_frame = NULL; int fb = get_ref_frame_idx(cpi, ref_frame); - if (cpi->scaled_ref_idx[fb] != cpi->common.ref_frame_map[fb]) - scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb]]; + int fb_scale = get_scale_ref_frame_idx(cpi, ref_frame); + if (cpi->scaled_ref_idx[fb_scale] != cpi->common.ref_frame_map[fb]) + scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb_scale]]; return scaled_ref_frame; } diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 0874afd..3198394 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -160,6 +160,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100); RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS); + RANGE_CHECK(cfg, ss_number_layers, 1, + VPX_SS_MAX_LAYERS); /*Spatial layers max */ /* VP8 does not support a lower bound on the keyframe interval in * automatic keyframe placement mode. */ @@ -317,6 +319,8 @@ static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf, oxcf->error_resilient_mode = cfg.g_error_resilient; oxcf->frame_parallel_decoding_mode = vp8_cfg.frame_parallel_decoding_mode; + + oxcf->ss_number_layers = cfg.ss_number_layers; /* printf("Current VP9 Settings: \n"); printf("target_bandwidth: %d\n", oxcf->target_bandwidth); @@ -423,6 +427,8 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx, MAP(VP8E_SET_ARNR_TYPE, xcfg.arnr_type); MAP(VP8E_SET_TUNING, xcfg.tuning); MAP(VP8E_SET_CQ_LEVEL, xcfg.cq_level); + MAP(VP9E_SET_MAX_Q, ctx->cfg.rc_max_quantizer); + MAP(VP9E_SET_MIN_Q, ctx->cfg.rc_min_quantizer); MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT, xcfg.rc_max_intra_bitrate_pct); MAP(VP9E_SET_LOSSLESS, xcfg.lossless); MAP(VP9E_SET_FRAME_PARALLEL_DECODING, xcfg.frame_parallel_decoding_mode); @@ -1004,6 +1010,68 @@ static vpx_codec_err_t vp9e_set_scalemode(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_INVALID_PARAM; } +static vpx_codec_err_t vp9e_set_width(vpx_codec_alg_priv_t *ctx, int ctr_id, + va_list args) { + unsigned int *data = va_arg(args, unsigned int *); + if (data) { + int res; + res = vp9_set_size_literal(ctx->cpi, *data, 0); + if (!res) { + return VPX_CODEC_OK; + } else { + return VPX_CODEC_INVALID_PARAM; + } + } else { + return VPX_CODEC_INVALID_PARAM; + } +} + +static vpx_codec_err_t vp9e_set_height(vpx_codec_alg_priv_t *ctx, + int ctr_id, + va_list args) { + unsigned int *data = va_arg(args, unsigned int *); + + if (data) { + int res; + res = vp9_set_size_literal(ctx->cpi, 0, *data); + + if (!res) { + return VPX_CODEC_OK; + } else { + return VPX_CODEC_INVALID_PARAM; + } + } else { + return VPX_CODEC_INVALID_PARAM; + } +} + +static vpx_codec_err_t vp9e_set_layer(vpx_codec_alg_priv_t *ctx, + int ctr_id, + va_list args) { + unsigned int *data = va_arg(args, unsigned int *); + + if (data) { + int res; + res = 0; + + res = vp9_switch_layer(ctx->cpi, *data); + + if (!res) { + return VPX_CODEC_OK; + } else { + return VPX_CODEC_INVALID_PARAM; + } + } else { + return VPX_CODEC_INVALID_PARAM; + } +} + +static vpx_codec_err_t vp9e_set_svc(vpx_codec_alg_priv_t *ctx, int ctr_id, + va_list args) { + int data = va_arg(args, int); + vp9_set_svc(ctx->cpi, data); + return VPX_CODEC_OK; +} static vpx_codec_ctrl_fn_map_t vp9e_ctf_maps[] = { {VP8_SET_REFERENCE, vp9e_set_reference}, @@ -1029,10 +1097,16 @@ static vpx_codec_ctrl_fn_map_t vp9e_ctf_maps[] = { {VP8E_SET_ARNR_TYPE, set_param}, {VP8E_SET_TUNING, set_param}, {VP8E_SET_CQ_LEVEL, set_param}, + {VP9E_SET_MAX_Q, set_param}, + {VP9E_SET_MIN_Q, set_param}, {VP8E_SET_MAX_INTRA_BITRATE_PCT, set_param}, {VP9E_SET_LOSSLESS, set_param}, {VP9E_SET_FRAME_PARALLEL_DECODING, set_param}, {VP9_GET_REFERENCE, get_reference}, + {VP9E_SET_WIDTH, vp9e_set_width}, + {VP9E_SET_HEIGHT, vp9e_set_height}, + {VP9E_SET_LAYER, vp9e_set_layer}, + {VP9E_SET_SVC, vp9e_set_svc}, { -1, NULL}, }; @@ -1082,6 +1156,8 @@ static vpx_codec_enc_cfg_map_t vp9e_usage_cfg_map[] = { 0, /* kf_min_dist */ 9999, /* kf_max_dist */ + VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */ + #if VPX_ENCODER_ABI_VERSION == (1 + VPX_CODEC_ABI_VERSION) 1, /* g_delete_first_pass_file */ "vp8.fpf" /* first pass filename */ diff --git a/vp9_spatial_scalable_encoder.c b/vp9_spatial_scalable_encoder.c new file mode 100644 index 0000000..8bb582f --- /dev/null +++ b/vp9_spatial_scalable_encoder.c @@ -0,0 +1,487 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This is an example demonstrating how to implement a multi-layer + * VP9 encoding scheme based on spatial scalability for video applications + * that benefit from a scalable bitstream. + */ +#include +#include +#include +#include +#include +#include +#include +#define VPX_CODEC_DISABLE_COMPAT 1 +#include "vpx/vpx_encoder.h" +#include "vpx/vp8cx.h" +#define interface (vpx_codec_vp9_cx()) +#define fourcc 0x30395056 +#define IVF_FILE_HDR_SZ (32) +#define IVF_FRAME_HDR_SZ (12) +#define NUM_BUFFERS 8 + +char *input_filename; +char *output_filename; +unsigned int number_frames_to_code = 60 * 60; +unsigned int number_frames_to_skip = 0; +unsigned int number_spatial_layers = 5; +unsigned int key_period = 100; + +typedef enum ENCODING_MODE { + INTER_LAYER_PREDICTION_I, + INTER_LAYER_PREDICTION_IP, + USE_GOLDEN_FRAME +} ENCODING_MODE; + +static void mem_put_le16(char *mem, unsigned int val) { + mem[0] = val; + mem[1] = val >> 8; +} + +static void mem_put_le32(char *mem, unsigned int val) { + mem[0] = val; + mem[1] = val >> 8; + mem[2] = val >> 16; + mem[3] = val >> 24; +} + +static void usage(char *program_name) { + printf( + "Usage: %s [-f frames] [-s skip_frames] [-w width] [-h height] \n\t" + "[-n rate_num] [-d rate_den] [-b bitrate] [-l layers] " + " \n", + basename(program_name)); + exit(EXIT_FAILURE); +} + +static void die(const char *fmt, ...) { + va_list ap; + + va_start(ap, fmt); + vprintf(fmt, ap); + if (fmt[strlen(fmt) - 1] != '\n') printf("\n"); + exit(EXIT_FAILURE); +} + +static void die_codec(vpx_codec_ctx_t *ctx, const char *s) { + const char *detail = vpx_codec_error_detail(ctx); + + printf("%s: %s\n", s, vpx_codec_error(ctx)); + if (detail) printf(" %s\n", detail); + exit(EXIT_FAILURE); +} + +static int read_frame(FILE *f, vpx_image_t *img) { + size_t nbytes, to_read; + int res = 1; + + to_read = img->w * img->h * 3 / 2; + nbytes = fread(img->planes[0], 1, to_read, f); + if (nbytes != to_read) { + res = 0; + if (nbytes > 0) + printf("Warning: Read partial frame. Check your width & height!\n"); + } + return res; +} + +static int read_dummy_frame(vpx_image_t *img) { + size_t to_read; + + to_read = img->w * img->h * 3 / 2; + memset(img->planes[0], 129, to_read); + return 1; +} + +static void write_ivf_file_header(FILE *outfile, const vpx_codec_enc_cfg_t *cfg, + int frame_cnt) { + char header[32]; + + if (cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS) return; + header[0] = 'D'; + header[1] = 'K'; + header[2] = 'I'; + header[3] = 'F'; + mem_put_le16(header + 4, 0); /* version */ + mem_put_le16(header + 6, 32); /* headersize */ + mem_put_le32(header + 8, fourcc); /* headersize */ + mem_put_le16(header + 12, cfg->g_w); /* width */ + mem_put_le16(header + 14, cfg->g_h); /* height */ + mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */ + mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */ + mem_put_le32(header + 24, frame_cnt); /* length */ + mem_put_le32(header + 28, 0); /* unused */ + + (void)fwrite(header, 1, 32, outfile); +} + +static void write_ivf_frame_header(FILE *outfile, + const vpx_codec_cx_pkt_t *pkt) { + char header[12]; + vpx_codec_pts_t pts; + + if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) return; + + pts = pkt->data.frame.pts; + mem_put_le32(header, pkt->data.frame.sz); + mem_put_le32(header + 4, pts & 0xFFFFFFFF); + mem_put_le32(header + 8, pts >> 32); + + (void)fwrite(header, 1, 12, outfile); +} + +static void check_parameters() { + if (number_spatial_layers > 5) die("Cannot support more than 5 layers"); +} + +static void parse_command_line(int argc, char **argv, + vpx_codec_enc_cfg_t *cfg) { + unsigned int width = 1920; + unsigned int height = 1080; + unsigned int timebase_num = 1; + unsigned int timebase_den = 60; + unsigned int bitrate = 1000; + int c; + vpx_codec_err_t res; + + opterr = 0; + while ((c = getopt(argc, argv, "f:w:h:n:d:b:s:l:p:")) != -1) switch (c) { + case 'f': + number_frames_to_code = atoi(optarg); + break; + case 'w': + width = atoi(optarg); + break; + case 'h': + height = atoi(optarg); + break; + case 'n': + timebase_num = atoi(optarg); + break; + case 'd': + timebase_den = atoi(optarg); + break; + case 'b': + bitrate = atoi(optarg); + break; + case 's': + number_frames_to_skip = atoi(optarg); + break; + case 'l': + number_spatial_layers = atoi(optarg); + break; + case 'p': + key_period = atoi(optarg); + break; + case '?': + usage(argv[0]); + } + + // Parse required parameters + if (argc - optind != 2) { + usage(argv[0]); + } + + input_filename = argv[optind]; + output_filename = argv[optind + 1]; + + if (width < 16 || width % 2 || height < 16 || height % 2) + die("Invalid resolution: %d x %d", width, height); + + /* Populate encoder configuration */ + res = vpx_codec_enc_config_default(interface, cfg, 0); + if (res) { + die("Failed to get config: %s\n", vpx_codec_err_to_string(res)); + } + printf( + "Codec %s\nframes: %d, skip: %d, layers: %d\n" + "width %d, height: %d, \n" + "num: %d, den: %d, bitrate: %d, \n" + "key period: %d \n", + vpx_codec_iface_name(interface), number_frames_to_code, + number_frames_to_skip, number_spatial_layers, width, height, timebase_num, + timebase_den, bitrate, key_period); + + // Do minimal check at the application level. Encoder parameters will be + // checked internally + check_parameters(); + + cfg->rc_target_bitrate = bitrate; + cfg->g_w = width; + cfg->g_h = height; + cfg->g_timebase.num = timebase_num; + cfg->g_timebase.den = timebase_den; + cfg->ss_number_layers = number_spatial_layers; +} + +static void set_default_configuration(vpx_codec_enc_cfg_t *cfg) { + /* Real time parameters */ + cfg->rc_dropframe_thresh = 0; + cfg->rc_end_usage = VPX_CBR; + cfg->rc_resize_allowed = 0; + cfg->rc_min_quantizer = 33; + cfg->rc_max_quantizer = 33; + cfg->rc_undershoot_pct = 100; + cfg->rc_overshoot_pct = 15; + cfg->rc_buf_initial_sz = 500; + cfg->rc_buf_optimal_sz = 600; + cfg->rc_buf_sz = 1000; + + /* Enable error resilient mode */ + cfg->g_error_resilient = 1; + cfg->g_lag_in_frames = 0; + + /* Disable automatic keyframe placement */ + cfg->kf_mode = VPX_KF_DISABLED; + cfg->kf_min_dist = cfg->kf_max_dist = 3000; +} + +static void initialize_codec(vpx_codec_ctx_t *codec, vpx_codec_enc_cfg_t *cfg) { + int max_intra_size_pct; + + /* Initialize codec */ + if (vpx_codec_enc_init(codec, interface, cfg, VPX_CODEC_USE_PSNR)) + die_codec(codec, "Failed to initialize encoder"); + + vpx_codec_control(codec, VP9E_SET_SVC, 1); + /* Cap CPU & first I-frame size */ + vpx_codec_control(codec, VP8E_SET_CPUUSED, 1); + vpx_codec_control(codec, VP8E_SET_STATIC_THRESHOLD, 1); + vpx_codec_control(codec, VP8E_SET_NOISE_SENSITIVITY, 1); + vpx_codec_control(codec, VP8E_SET_TOKEN_PARTITIONS, 1); + + max_intra_size_pct = + (int)(((double)cfg->rc_buf_optimal_sz * 0.5) * + ((double)cfg->g_timebase.den / cfg->g_timebase.num) / 10.0); + /* printf ("max_intra_size_pct=%d\n", max_intra_size_pct); */ + + vpx_codec_control(codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct); +} + +static int calculate_layer(int frame_cnt, int number_spatial_layers) { + if (frame_cnt == 0) + return 0; + else + return (frame_cnt + number_spatial_layers - 1) % number_spatial_layers; +} + +static void switch_to_layer(int layer, unsigned int initial_width, + unsigned int initial_height, + vpx_codec_ctx_t *codec) { + // Set layer size + int scaling_factor_num[MAX_LAYERS] = {2, 1, 4, 2, 1}; + int scaling_factor_den[MAX_LAYERS] = {9, 3, 9, 3, 1}; + + int quantizer[MAX_LAYERS] = {60, 53, 39, 33, 27}; + + unsigned int current_width; + unsigned int current_height; + + current_width = initial_width * + scaling_factor_num[layer + 5 - number_spatial_layers] / + scaling_factor_den[layer + 5 - number_spatial_layers]; + current_height = initial_height * + scaling_factor_num[layer + 5 - number_spatial_layers] / + scaling_factor_den[layer + 5 - number_spatial_layers]; + + current_width += current_width % 2; + current_height += current_height % 2; + + vpx_codec_control(codec, VP9E_SET_WIDTH, ¤t_width); + vpx_codec_control(codec, VP9E_SET_HEIGHT, ¤t_height); + + // Set layer context + vpx_codec_control(codec, VP9E_SET_LAYER, &layer); + vpx_codec_control(codec, VP9E_SET_MAX_Q, + quantizer[layer + 5 - number_spatial_layers]); + vpx_codec_control(codec, VP9E_SET_MIN_Q, + quantizer[layer + 5 - number_spatial_layers]); +} + +static int get_flag(int is_I_frame_in_layer, int layer, ENCODING_MODE mode) { + // First layer + switch (mode) { + case INTER_LAYER_PREDICTION_I: + if (is_I_frame_in_layer && layer == 0) return VPX_EFLAG_FORCE_KF; + if (layer == 0) + return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; + else if (is_I_frame_in_layer) + return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST; + else + return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; + break; + + case INTER_LAYER_PREDICTION_IP: + if (is_I_frame_in_layer && layer == 0) return VPX_EFLAG_FORCE_KF; + if (layer == 0) + return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; + else if (is_I_frame_in_layer) + return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST; + else + return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; + break; + + case USE_GOLDEN_FRAME: + if (is_I_frame_in_layer && layer == 0) return VPX_EFLAG_FORCE_KF; + if (2 * number_spatial_layers - NUM_BUFFERS <= layer) { + if (layer == 0) + return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_ARF; + else if (is_I_frame_in_layer) + return VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_LAST; + else + return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + } else { + if (layer == 0) + return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; + else if (is_I_frame_in_layer) + return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_LAST; + else + return VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; + } + break; + default: + return VPX_EFLAG_FORCE_KF; + } +} + +int main(int argc, char **argv) { + FILE *infile, *outfile[MAX_LAYERS]; + vpx_codec_ctx_t codec; + vpx_codec_enc_cfg_t cfg; + int frame_cnt = 0; + vpx_image_t raw; + int frame_avail = 1; + int got_data = 0; + int i; + int frames_in_layer[MAX_LAYERS] = {0}; + clock_t before; + clock_t after; + int pts = 0; /* PTS starts at 0 */ + int frame_duration = 1; /* 1 timebase tick per frame */ + + parse_command_line(argc, argv, &cfg); + + // Allocate image buffer + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, cfg.g_w, cfg.g_h, 32)) + die("Failed to allocate image", cfg.g_w, cfg.g_h); + + set_default_configuration(&cfg); + + /* Open input file */ + if (!(infile = fopen(input_filename, "rb"))) + die("Failed to open %s for reading", argv[1]); + + /* Open output file */ + for (i = 0; i < number_spatial_layers; i++) { + char file_name[512]; + snprintf(file_name, sizeof(file_name), "%s_%d.ivf", output_filename, i); + if (!(outfile[i] = fopen(file_name, "wb"))) + die("Failed to open %s for writing", file_name); + write_ivf_file_header(outfile[i], &cfg, 0); + } + + initialize_codec(&codec, &cfg); + + // skip initial frames + for (i = 0; i < number_frames_to_skip; i++) { + read_frame(infile, &raw); + } + + before = clock(); + // Encoding frames + while ((frame_avail || got_data) && + frame_cnt <= number_frames_to_code * number_spatial_layers) { + int flags = 0; + vpx_codec_iter_t iter = NULL; + const vpx_codec_cx_pkt_t *pkt; + + int layer = calculate_layer(frame_cnt, number_spatial_layers); + int is_I_frame_in_layer = + (((frame_cnt - 1) / number_spatial_layers % key_period) == 0); + int is_dummy = (frame_cnt == 0); + + if (is_dummy) { // Dummy frame + flags = VPX_EFLAG_FORCE_KF; + frame_avail = read_dummy_frame(&raw); + + } else { // Regular frame + // Read a new frame only at the base layer + if (layer == 0) frame_avail = read_frame(infile, &raw); + switch_to_layer(layer, cfg.g_w, cfg.g_h, &codec); + flags = get_flag(is_I_frame_in_layer, layer, INTER_LAYER_PREDICTION_I); + } + + // Actual Encoding + if (vpx_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags, + VPX_DL_REALTIME)) + die_codec(&codec, "Failed to encode frame"); + + got_data = 0; + // Process data / Get PSNR statistics + while ((pkt = vpx_codec_get_cx_data(&codec, &iter))) { + got_data = 1; + switch (pkt->kind) { + case VPX_CODEC_CX_FRAME_PKT: + for (i = layer; i < number_spatial_layers; i++) { + write_ivf_frame_header(outfile[i], pkt); + (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, + outfile[i]); + frames_in_layer[i]++; + } + break; + case VPX_CODEC_PSNR_PKT: + if (frame_cnt != 0) + printf( + "Processed Frame %d, layer %d, PSNR(Total/Y/U/V): " + "%2.3f %2.3f %2.3f %2.3f \n", + (frame_cnt - 1) / number_spatial_layers + 1, layer, + pkt->data.psnr.psnr[0], pkt->data.psnr.psnr[1], + pkt->data.psnr.psnr[2], pkt->data.psnr.psnr[3]); + break; + default: + break; + } + } + frame_cnt++; + // TODO(ivan): Modify ts later if(!layer) + pts += frame_duration; + } + // end while + + after = clock(); + printf("Processed %d frames in different resolutions in %ld ms.\n", + frame_cnt - 1, (int)(after - before) / (CLOCKS_PER_SEC / 1000)); + + fclose(infile); + + if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + + /* Try to rewrite the output file headers with the actual frame count */ + for (i = 0; i < number_spatial_layers; i++) { + if (!fseek(outfile[i], 0, SEEK_SET)) { + write_ivf_file_header(outfile[i], &cfg, frames_in_layer[i]); + } + fclose(outfile[i]); + } + + return EXIT_SUCCESS; +} diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h index f8e2ef9..f3ea6d3 100644 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h @@ -190,7 +190,15 @@ enum vp8e_enc_control_id { VP9E_SET_LOSSLESS, VP9E_SET_TILE_COLUMNS, VP9E_SET_TILE_ROWS, - VP9E_SET_FRAME_PARALLEL_DECODING + VP9E_SET_FRAME_PARALLEL_DECODING, + + VP9E_SET_WIDTH = 99, + VP9E_SET_HEIGHT, + VP9E_SET_LAYER, + VP9E_SET_SVC, + + VP9E_SET_MAX_Q, + VP9E_SET_MIN_Q }; /*!\brief vpx 1-D scaling mode @@ -292,6 +300,12 @@ VPX_CTRL_USE_TYPE(VP8E_SET_ROI_MAP, vpx_roi_map_t *) VPX_CTRL_USE_TYPE(VP8E_SET_ACTIVEMAP, vpx_active_map_t *) VPX_CTRL_USE_TYPE(VP8E_SET_SCALEMODE, vpx_scaling_mode_t *) +VPX_CTRL_USE_TYPE(VP9E_SET_LAYER, int *) +VPX_CTRL_USE_TYPE(VP9E_SET_SVC, int) + +VPX_CTRL_USE_TYPE(VP9E_SET_WIDTH, unsigned int *) +VPX_CTRL_USE_TYPE(VP9E_SET_HEIGHT, unsigned int *) + VPX_CTRL_USE_TYPE(VP8E_SET_CPUUSED, int) VPX_CTRL_USE_TYPE(VP8E_SET_ENABLEAUTOALTREF, unsigned int) VPX_CTRL_USE_TYPE(VP8E_SET_NOISE_SENSITIVITY, unsigned int) @@ -316,6 +330,9 @@ VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTRA_BITRATE_PCT, unsigned int) VPX_CTRL_USE_TYPE(VP9E_SET_LOSSLESS, unsigned int) VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PARALLEL_DECODING, unsigned int) + +VPX_CTRL_USE_TYPE(VP9E_SET_MAX_Q, unsigned int) +VPX_CTRL_USE_TYPE(VP9E_SET_MIN_Q, unsigned int) /*! @} - end defgroup vp8_encoder */ #include "vpx_codec_impl_bottom.h" #endif diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h index ffdbc06..181b92a 100644 --- a/vpx/vpx_encoder.h +++ b/vpx/vpx_encoder.h @@ -46,6 +46,12 @@ extern "C" { /*!\deprecated Use #VPX_TS_MAX_LAYERS instead. */ #define MAX_LAYERS VPX_TS_MAX_LAYERS +/*! Spatial Scalability: Maximum number of coding layers */ +#define VPX_SS_MAX_LAYERS 5 + +/*! Spatial Scalability: Default number of coding layers */ +#define VPX_SS_DEFAULT_LAYERS 3 + /*!\brief Current ABI version number * * \internal @@ -595,8 +601,14 @@ extern "C" { unsigned int kf_max_dist; /* - * Temporal scalability settings (ts) + * Spatial scalability settings (ss) + */ + + /*!\brief Number of coding layers (spatial) + * + * This value specifies the number of coding layers to be used. */ + unsigned int ss_number_layers; /*!\brief Number of coding layers *