2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
13 * This is an example demonstrating how to implement a multi-layer VP8
14 * encoding scheme based on temporal scalability for video applications
15 * that benefit from a scalable bitstream.
21 #define VPX_CODEC_DISABLE_COMPAT 1
22 #include "vpx/vpx_encoder.h"
23 #include "vpx/vp8cx.h"
24 #define interface (vpx_codec_vp8_cx())
25 #define fourcc 0x30385056
27 #define IVF_FILE_HDR_SZ (32)
28 #define IVF_FRAME_HDR_SZ (12)
30 static void mem_put_le16(char *mem, unsigned int val) {
35 static void mem_put_le32(char *mem, unsigned int val) {
42 static void die(const char *fmt, ...) {
47 if(fmt[strlen(fmt)-1] != '\n')
52 static void die_codec(vpx_codec_ctx_t *ctx, const char *s) {
53 const char *detail = vpx_codec_error_detail(ctx);
55 printf("%s: %s\n", s, vpx_codec_error(ctx));
57 printf(" %s\n",detail);
61 static int read_frame(FILE *f, vpx_image_t *img) {
62 size_t nbytes, to_read;
65 to_read = img->w*img->h*3/2;
66 nbytes = fread(img->planes[0], 1, to_read, f);
67 if(nbytes != to_read) {
70 printf("Warning: Read partial frame. Check your width & height!\n");
75 static void write_ivf_file_header(FILE *outfile,
76 const vpx_codec_enc_cfg_t *cfg,
80 if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
86 mem_put_le16(header+4, 0); /* version */
87 mem_put_le16(header+6, 32); /* headersize */
88 mem_put_le32(header+8, fourcc); /* headersize */
89 mem_put_le16(header+12, cfg->g_w); /* width */
90 mem_put_le16(header+14, cfg->g_h); /* height */
91 mem_put_le32(header+16, cfg->g_timebase.den); /* rate */
92 mem_put_le32(header+20, cfg->g_timebase.num); /* scale */
93 mem_put_le32(header+24, frame_cnt); /* length */
94 mem_put_le32(header+28, 0); /* unused */
96 if(fwrite(header, 1, 32, outfile));
100 static void write_ivf_frame_header(FILE *outfile,
101 const vpx_codec_cx_pkt_t *pkt)
106 if(pkt->kind != VPX_CODEC_CX_FRAME_PKT)
109 pts = pkt->data.frame.pts;
110 mem_put_le32(header, pkt->data.frame.sz);
111 mem_put_le32(header+4, pts&0xFFFFFFFF);
112 mem_put_le32(header+8, pts >> 32);
114 if(fwrite(header, 1, 12, outfile));
117 static int mode_to_num_layers[9] = {2, 2, 3, 3, 3, 3, 5, 2, 3};
119 int main(int argc, char **argv) {
120 FILE *infile, *outfile[VPX_TS_MAX_LAYERS];
121 vpx_codec_ctx_t codec;
122 vpx_codec_enc_cfg_t cfg;
132 int pts = 0; // PTS starts at 0
133 int frame_duration = 1; // 1 timebase tick per frame
135 int layering_mode = 0;
136 int frames_in_layer[VPX_TS_MAX_LAYERS] = {0};
137 int layer_flags[VPX_TS_MAX_PERIODICITY] = {0};
138 int flag_periodicity;
139 int max_intra_size_pct;
141 // Check usage and arguments
143 die("Usage: %s <infile> <outfile> <width> <height> <rate_num> "
144 " <rate_den> <mode> <Rate_0> ... <Rate_nlayers-1>\n", argv[0]);
146 width = strtol (argv[3], NULL, 0);
147 height = strtol (argv[4], NULL, 0);
148 if (width < 16 || width%2 || height <16 || height%2)
149 die ("Invalid resolution: %d x %d", width, height);
151 if (!sscanf(argv[7], "%d", &layering_mode))
152 die ("Invalid mode %s", argv[7]);
153 if (layering_mode<0 || layering_mode>8)
154 die ("Invalid mode (0..8) %s", argv[7]);
156 if (argc != 8+mode_to_num_layers[layering_mode])
157 die ("Invalid number of arguments");
159 if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 1))
160 die ("Failed to allocate image", width, height);
162 printf("Using %s\n",vpx_codec_iface_name(interface));
164 // Populate encoder configuration
165 res = vpx_codec_enc_config_default(interface, &cfg, 0);
167 printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
171 // Update the default configuration with our settings
175 // Timebase format e.g. 30fps: numerator=1, demoninator=30
176 if (!sscanf (argv[5], "%d", &cfg.g_timebase.num ))
177 die ("Invalid timebase numerator %s", argv[5]);
178 if (!sscanf (argv[6], "%d", &cfg.g_timebase.den ))
179 die ("Invalid timebase denominator %s", argv[6]);
181 for (i=8; i<8+mode_to_num_layers[layering_mode]; i++)
182 if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-8]))
183 die ("Invalid data rate %s", argv[i]);
185 // Real time parameters
186 cfg.rc_dropframe_thresh = 0; // 30
187 cfg.rc_end_usage = VPX_CBR;
188 cfg.rc_resize_allowed = 0;
189 cfg.rc_min_quantizer = 8;
190 cfg.rc_max_quantizer = 56;
191 cfg.rc_undershoot_pct = 100;
192 cfg.rc_overshoot_pct = 15;
193 cfg.rc_buf_initial_sz = 500;
194 cfg.rc_buf_optimal_sz = 600;
195 cfg.rc_buf_sz = 1000;
197 // Enable error resilient mode
198 cfg.g_error_resilient = 1;
199 cfg.g_lag_in_frames = 0;
200 cfg.kf_mode = VPX_KF_DISABLED;
202 // Disable automatic keyframe placement
203 cfg.kf_min_dist = cfg.kf_max_dist = 1000;
205 // Temporal scaling parameters:
206 // NOTE: The 3 prediction frames cannot be used interchangeably due to
207 // differences in the way they are handled throughout the code. The
208 // frames should be allocated to layers in the order LAST, GF, ARF.
209 // Other combinations work, but may produce slightly inferior results.
210 switch (layering_mode)
215 // 2-layers, 2-frame period
217 cfg.ts_number_layers = 2;
218 cfg.ts_periodicity = 2;
219 cfg.ts_rate_decimator[0] = 2;
220 cfg.ts_rate_decimator[1] = 1;
221 memcpy(cfg.ts_layer_id, ids, sizeof(ids));
223 flag_periodicity = cfg.ts_periodicity;
225 // 0=L, 1=GF, Intra-layer prediction enabled
226 layer_flags[0] = VPX_EFLAG_FORCE_KF |
227 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
228 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
229 layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
230 VP8_EFLAG_NO_REF_ARF;
232 // 0=L, 1=GF, Intra-layer prediction disabled
233 layer_flags[0] = VPX_EFLAG_FORCE_KF |
234 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
235 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
236 layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
237 VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_LAST;
244 // 2-layers, 3-frame period
245 int ids[3] = {0,1,1};
246 cfg.ts_number_layers = 2;
247 cfg.ts_periodicity = 3;
248 cfg.ts_rate_decimator[0] = 3;
249 cfg.ts_rate_decimator[1] = 1;
250 memcpy(cfg.ts_layer_id, ids, sizeof(ids));
252 flag_periodicity = cfg.ts_periodicity;
254 // 0=L, 1=GF, Intra-layer prediction enabled
255 layer_flags[0] = VPX_EFLAG_FORCE_KF |
256 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
257 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
259 layer_flags[2] = VP8_EFLAG_NO_REF_GF |
260 VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF |
261 VP8_EFLAG_NO_UPD_LAST;
267 // 3-layers, 6-frame period
268 int ids[6] = {0,2,2,1,2,2};
269 cfg.ts_number_layers = 3;
270 cfg.ts_periodicity = 6;
271 cfg.ts_rate_decimator[0] = 6;
272 cfg.ts_rate_decimator[1] = 3;
273 cfg.ts_rate_decimator[2] = 1;
274 memcpy(cfg.ts_layer_id, ids, sizeof(ids));
276 flag_periodicity = cfg.ts_periodicity;
278 // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled
279 layer_flags[0] = VPX_EFLAG_FORCE_KF |
280 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
281 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
282 layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF |
283 VP8_EFLAG_NO_UPD_LAST;
287 layer_flags[5] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
293 // 3-layers, 4-frame period
294 int ids[4] = {0,2,1,2};
295 cfg.ts_number_layers = 3;
296 cfg.ts_periodicity = 4;
297 cfg.ts_rate_decimator[0] = 4;
298 cfg.ts_rate_decimator[1] = 2;
299 cfg.ts_rate_decimator[2] = 1;
300 memcpy(cfg.ts_layer_id, ids, sizeof(ids));
302 flag_periodicity = cfg.ts_periodicity;
304 // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled
305 layer_flags[0] = VPX_EFLAG_FORCE_KF |
306 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
307 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
308 layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
309 VP8_EFLAG_NO_UPD_ARF |
310 VP8_EFLAG_NO_UPD_LAST;
312 layer_flags[3] = VP8_EFLAG_NO_REF_ARF |
313 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
314 VP8_EFLAG_NO_UPD_ARF;
320 // 3-layers, 4-frame period
321 int ids[4] = {0,2,1,2};
322 cfg.ts_number_layers = 3;
323 cfg.ts_periodicity = 4;
324 cfg.ts_rate_decimator[0] = 4;
325 cfg.ts_rate_decimator[1] = 2;
326 cfg.ts_rate_decimator[2] = 1;
327 memcpy(cfg.ts_layer_id, ids, sizeof(ids));
329 flag_periodicity = cfg.ts_periodicity;
331 // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1,
332 // disabled in layer 2
333 layer_flags[0] = VPX_EFLAG_FORCE_KF |
334 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
335 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
336 layer_flags[2] = VP8_EFLAG_NO_REF_ARF |
337 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
339 layer_flags[3] = VP8_EFLAG_NO_REF_ARF |
340 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
341 VP8_EFLAG_NO_UPD_ARF;
347 // 3-layers, 4-frame period
348 int ids[4] = {0,2,1,2};
349 cfg.ts_number_layers = 3;
350 cfg.ts_periodicity = 4;
351 cfg.ts_rate_decimator[0] = 4;
352 cfg.ts_rate_decimator[1] = 2;
353 cfg.ts_rate_decimator[2] = 1;
354 memcpy(cfg.ts_layer_id, ids, sizeof(ids));
356 flag_periodicity = cfg.ts_periodicity;
358 // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled
359 layer_flags[0] = VPX_EFLAG_FORCE_KF |
360 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
361 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
362 layer_flags[2] = VP8_EFLAG_NO_REF_ARF |
363 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
365 layer_flags[3] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
371 // NOTE: Probably of academic interest only
373 // 5-layers, 16-frame period
374 int ids[16] = {0,4,3,4,2,4,3,4,1,4,3,4,2,4,3,4};
375 cfg.ts_number_layers = 5;
376 cfg.ts_periodicity = 16;
377 cfg.ts_rate_decimator[0] = 16;
378 cfg.ts_rate_decimator[1] = 8;
379 cfg.ts_rate_decimator[2] = 4;
380 cfg.ts_rate_decimator[3] = 2;
381 cfg.ts_rate_decimator[4] = 1;
382 memcpy(cfg.ts_layer_id, ids, sizeof(ids));
384 flag_periodicity = cfg.ts_periodicity;
386 layer_flags[0] = VPX_EFLAG_FORCE_KF;
394 layer_flags[15] = VP8_EFLAG_NO_UPD_LAST |
395 VP8_EFLAG_NO_UPD_GF |
396 VP8_EFLAG_NO_UPD_ARF;
400 layer_flags[14] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF;
402 layer_flags[12] = VP8_EFLAG_NO_REF_LAST |
403 VP8_EFLAG_NO_UPD_ARF;
404 layer_flags[8] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF;
412 cfg.ts_number_layers = 2;
413 cfg.ts_periodicity = 2;
414 cfg.ts_rate_decimator[0] = 2;
415 cfg.ts_rate_decimator[1] = 1;
416 memcpy(cfg.ts_layer_id, ids, sizeof(ids));
418 flag_periodicity = 8;
421 layer_flags[0] = VPX_EFLAG_FORCE_KF |
422 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
423 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
424 layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
425 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
428 layer_flags[6] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
429 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
431 layer_flags[5] = VP8_EFLAG_NO_REF_ARF |
432 VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
433 layer_flags[7] = VP8_EFLAG_NO_REF_ARF |
434 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
435 VP8_EFLAG_NO_UPD_ARF |
436 VP8_EFLAG_NO_UPD_ENTROPY;
444 int ids[4] = {0,2,1,2};
445 cfg.ts_number_layers = 3;
446 cfg.ts_periodicity = 4;
447 cfg.ts_rate_decimator[0] = 4;
448 cfg.ts_rate_decimator[1] = 2;
449 cfg.ts_rate_decimator[2] = 1;
450 memcpy(cfg.ts_layer_id, ids, sizeof(ids));
452 flag_periodicity = 8;
455 layer_flags[0] = VPX_EFLAG_FORCE_KF |
456 VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
457 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
458 layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
459 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
460 layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
461 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
463 layer_flags[5] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
464 layer_flags[4] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
465 VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
466 layer_flags[6] = VP8_EFLAG_NO_REF_ARF |
467 VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
468 layer_flags[7] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
469 VP8_EFLAG_NO_UPD_ARF |
470 VP8_EFLAG_NO_UPD_ENTROPY;
476 if(!(infile = fopen(argv[1], "rb")))
477 die("Failed to open %s for reading", argv[1]);
479 // Open an output file for each stream
480 for (i=0; i<cfg.ts_number_layers; i++)
483 sprintf (file_name, "%s_%d.ivf", argv[2], i);
484 if (!(outfile[i] = fopen(file_name, "wb")))
485 die("Failed to open %s for writing", file_name);
486 write_ivf_file_header(outfile[i], &cfg, 0);
490 if (vpx_codec_enc_init (&codec, interface, &cfg, 0))
491 die_codec (&codec, "Failed to initialize encoder");
493 // Cap CPU & first I-frame size
494 vpx_codec_control (&codec, VP8E_SET_CPUUSED, -6);
495 vpx_codec_control (&codec, VP8E_SET_STATIC_THRESHOLD, 800);
496 vpx_codec_control (&codec, VP8E_SET_NOISE_SENSITIVITY, 2);
498 max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5)
499 * ((double) cfg.g_timebase.den / cfg.g_timebase.num)
501 //printf ("max_intra_size_pct=%d\n", max_intra_size_pct);
503 vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT,
505 // vpx_codec_control (&codec, VP8E_SET_TOKEN_PARTITIONS,
506 // static_cast<vp8e_token_partitions>(_tokenPartitions));
509 while (frame_avail || got_data) {
510 vpx_codec_iter_t iter = NULL;
511 const vpx_codec_cx_pkt_t *pkt;
513 flags = layer_flags[frame_cnt % flag_periodicity];
515 frame_avail = read_frame(infile, &raw);
516 if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts,
517 1, flags, VPX_DL_REALTIME))
518 die_codec(&codec, "Failed to encode frame");
521 if (layering_mode != 6)
522 layer_flags[0] &= ~VPX_EFLAG_FORCE_KF;
525 while ( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) {
528 case VPX_CODEC_CX_FRAME_PKT:
529 for (i=cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
530 i<cfg.ts_number_layers; i++)
532 write_ivf_frame_header(outfile[i], pkt);
533 if (fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
535 frames_in_layer[i]++;
541 printf (pkt->kind == VPX_CODEC_CX_FRAME_PKT
542 && (pkt->data.frame.flags & VPX_FRAME_IS_KEY)? "K":".");
546 pts += frame_duration;
551 printf ("Processed %d frames.\n",frame_cnt-1);
552 if (vpx_codec_destroy(&codec))
553 die_codec (&codec, "Failed to destroy codec");
555 // Try to rewrite the output file headers with the actual frame count
556 for (i=0; i<cfg.ts_number_layers; i++)
558 if (!fseek(outfile[i], 0, SEEK_SET))
559 write_ivf_file_header (outfile[i], &cfg, frames_in_layer[i]);