vpx/src/svc_encodeframe.c

   1 /*
   2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 /**
  12  * @file
  13  * VP9 SVC encoding support via libvpx
  14  */
  15
  16 #include <math.h>
  17 #include <stdarg.h>
  18 #include <stdio.h>
  19 #include <stdlib.h>
  20 #include <string.h>
  21 #define VPX_DISABLE_CTRL_TYPECHECKS 1
  22 #define VPX_CODEC_DISABLE_COMPAT 1
  23 #include "vpx/svc_context.h"
  24 #include "vpx/vp8cx.h"
  25 #include "vpx/vpx_encoder.h"
  26
  27 #ifdef __MINGW32__
  28 #define strtok_r strtok_s
  29 #ifndef MINGW_HAS_SECURE_API
  30 // proto from /usr/x86_64-w64-mingw32/include/sec_api/string_s.h
  31 _CRTIMP char *__cdecl strtok_s(char *str, const char *delim, char **context);
  32 #endif  /* MINGW_HAS_SECURE_API */
  33 #endif  /* __MINGW32__ */
  34
  35 #ifdef _MSC_VER
  36 #define strdup _strdup
  37 #define strtok_r strtok_s
  38 #endif
  39
  40 #define SVC_REFERENCE_FRAMES 8
  41 #define SUPERFRAME_SLOTS (8)
  42 #define SUPERFRAME_BUFFER_SIZE (SUPERFRAME_SLOTS * sizeof(uint32_t) + 2)
  43 #define OPTION_BUFFER_SIZE 256
  44 #define COMPONENTS 4  // psnr & sse statistics maintained for total, y, u, v
  45
  46 static const char *DEFAULT_QUANTIZER_VALUES = "60,53,39,33,27";
  47 static const char *DEFAULT_SCALE_FACTORS = "4/16,5/16,7/16,11/16,16/16";
  48
  49 typedef struct SvcInternal {
  50   char options[OPTION_BUFFER_SIZE];        // set by vpx_svc_set_options
  51   char quantizers[OPTION_BUFFER_SIZE];     // set by vpx_svc_set_quantizers
  52   char quantizers_keyframe[OPTION_BUFFER_SIZE];  // set by
  53                                                  // vpx_svc_set_quantizers
  54   char scale_factors[OPTION_BUFFER_SIZE];  // set by vpx_svc_set_scale_factors
  55
  56   // values extracted from option, quantizers
  57   int scaling_factor_num[VPX_SS_MAX_LAYERS];
  58   int scaling_factor_den[VPX_SS_MAX_LAYERS];
  59   int quantizer_keyframe[VPX_SS_MAX_LAYERS];
  60   int quantizer[VPX_SS_MAX_LAYERS];
  61
  62   // accumulated statistics
  63   double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS];   // total/Y/U/V
  64   uint64_t sse_sum[VPX_SS_MAX_LAYERS][COMPONENTS];
  65   uint32_t bytes_sum[VPX_SS_MAX_LAYERS];
  66
  67   // codec encoding values
  68   int width;    // width of highest layer
  69   int height;   // height of highest layer
  70   int kf_dist;  // distance between keyframes
  71
  72   // state variables
  73   int encode_frame_count;
  74   int frame_within_gop;
  75   vpx_enc_frame_flags_t enc_frame_flags;
  76   int layers;
  77   int layer;
  78   int is_keyframe;
  79
  80   size_t frame_size;
  81   size_t buffer_size;
  82   void *buffer;
  83
  84   char message_buffer[2048];
  85   vpx_codec_ctx_t *codec_ctx;
  86 } SvcInternal;
  87
  88 // Superframe is used to generate an index of individual frames (i.e., layers)
  89 struct Superframe {
  90   int count;
  91   uint32_t sizes[SUPERFRAME_SLOTS];
  92   uint32_t magnitude;
  93   uint8_t buffer[SUPERFRAME_BUFFER_SIZE];
  94   size_t index_size;
  95 };
  96
  97 // One encoded frame layer
  98 struct LayerData {
  99   void *buf;    // compressed data buffer
 100   size_t size;  // length of compressed data
 101   struct LayerData *next;
 102 };
 103
 104 // create LayerData from encoder output
 105 static struct LayerData *ld_create(void *buf, size_t size) {
 106   struct LayerData *const layer_data =
 107       (struct LayerData *)malloc(sizeof(*layer_data));
 108   if (layer_data == NULL) {
 109     return NULL;
 110   }
 111   layer_data->buf = malloc(size);
 112   if (layer_data->buf == NULL) {
 113     free(layer_data);
 114     return NULL;
 115   }
 116   memcpy(layer_data->buf, buf, size);
 117   layer_data->size = size;
 118   return layer_data;
 119 }
 120
 121 // free LayerData
 122 static void ld_free(struct LayerData *layer_data) {
 123   if (layer_data) {
 124     if (layer_data->buf) {
 125       free(layer_data->buf);
 126       layer_data->buf = NULL;
 127     }
 128     free(layer_data);
 129   }
 130 }
 131
 132 // add layer data to list
 133 static void ld_list_add(struct LayerData **list, struct LayerData *layer_data) {
 134   struct LayerData **p = list;
 135
 136   while (*p != NULL) p = &(*p)->next;
 137   *p = layer_data;
 138   layer_data->next = NULL;
 139 }
 140
 141 // get accumulated size of layer data
 142 static size_t ld_list_get_buffer_size(struct LayerData *list) {
 143   struct LayerData *p;
 144   size_t size = 0;
 145
 146   for (p = list; p != NULL; p = p->next) {
 147     size += p->size;
 148   }
 149   return size;
 150 }
 151
 152 // copy layer data to buffer
 153 static void ld_list_copy_to_buffer(struct LayerData *list, uint8_t *buffer) {
 154   struct LayerData *p;
 155
 156   for (p = list; p != NULL; p = p->next) {
 157     buffer[0] = 1;
 158     memcpy(buffer, p->buf, p->size);
 159     buffer += p->size;
 160   }
 161 }
 162
 163 // free layer data list
 164 static void ld_list_free(struct LayerData *list) {
 165   struct LayerData *p = list;
 166
 167   while (p) {
 168     list = list->next;
 169     ld_free(p);
 170     p = list;
 171   }
 172 }
 173
 174 static void sf_create_index(struct Superframe *sf) {
 175   uint8_t marker = 0xc0;
 176   int i;
 177   uint32_t mag, mask;
 178   uint8_t *bufp;
 179
 180   if (sf->count == 0 || sf->count >= 8) return;
 181
 182   // Add the number of frames to the marker byte
 183   marker |= sf->count - 1;
 184
 185   // Choose the magnitude
 186   for (mag = 0, mask = 0xff; mag < 4; ++mag) {
 187     if (sf->magnitude < mask) break;
 188     mask <<= 8;
 189     mask |= 0xff;
 190   }
 191   marker |= mag << 3;
 192
 193   // Write the index
 194   sf->index_size = 2 + (mag + 1) * sf->count;
 195   bufp = sf->buffer;
 196
 197   *bufp++ = marker;
 198   for (i = 0; i < sf->count; ++i) {
 199     int this_sz = sf->sizes[i];
 200     uint32_t j;
 201
 202     for (j = 0; j <= mag; ++j) {
 203       *bufp++ = this_sz & 0xff;
 204       this_sz >>= 8;
 205     }
 206   }
 207   *bufp++ = marker;
 208 }
 209
 210 static SvcInternal *get_svc_internal(SvcContext *svc_ctx) {
 211   if (svc_ctx == NULL) return NULL;
 212   if (svc_ctx->internal == NULL) {
 213     SvcInternal *const si = (SvcInternal *)malloc(sizeof(*si));
 214     if (si != NULL) {
 215       memset(si, 0, sizeof(*si));
 216     }
 217     svc_ctx->internal = si;
 218   }
 219   return (SvcInternal *)svc_ctx->internal;
 220 }
 221
 222 static const SvcInternal *get_const_svc_internal(const SvcContext *svc_ctx) {
 223   if (svc_ctx == NULL) return NULL;
 224   return (const SvcInternal *)svc_ctx->internal;
 225 }
 226
 227 static void svc_log_reset(SvcContext *svc_ctx) {
 228   SvcInternal *const si = (SvcInternal *)svc_ctx->internal;
 229   si->message_buffer[0] = '\0';
 230 }
 231
 232 static int svc_log(SvcContext *svc_ctx, int level, const char *fmt, ...) {
 233   char buf[512];
 234   int retval = 0;
 235   va_list ap;
 236   SvcInternal *const si = get_svc_internal(svc_ctx);
 237
 238   if (level > svc_ctx->log_level) {
 239     return retval;
 240   }
 241
 242   va_start(ap, fmt);
 243   retval = vsnprintf(buf, sizeof(buf), fmt, ap);
 244   va_end(ap);
 245
 246   if (svc_ctx->log_print) {
 247     printf("%s", buf);
 248   } else {
 249     strncat(si->message_buffer, buf,
 250             sizeof(si->message_buffer) - strlen(si->message_buffer) - 1);
 251   }
 252
 253   if (level == SVC_LOG_ERROR) {
 254     si->codec_ctx->err_detail = si->message_buffer;
 255   }
 256   return retval;
 257 }
 258
 259 static vpx_codec_err_t set_option_encoding_mode(SvcContext *svc_ctx,
 260                                                 const char *value_str) {
 261   if (strcmp(value_str, "i") == 0) {
 262     svc_ctx->encoding_mode = INTER_LAYER_PREDICTION_I;
 263   } else if (strcmp(value_str, "alt-ip") == 0) {
 264     svc_ctx->encoding_mode = ALT_INTER_LAYER_PREDICTION_IP;
 265   } else if (strcmp(value_str, "ip") == 0) {
 266     svc_ctx->encoding_mode = INTER_LAYER_PREDICTION_IP;
 267   } else if (strcmp(value_str, "gf") == 0) {
 268     svc_ctx->encoding_mode = USE_GOLDEN_FRAME;
 269   } else {
 270     svc_log(svc_ctx, SVC_LOG_ERROR, "invalid encoding mode: %s", value_str);
 271     return VPX_CODEC_INVALID_PARAM;
 272   }
 273   return VPX_CODEC_OK;
 274 }
 275
 276 static vpx_codec_err_t parse_quantizer_values(SvcContext *svc_ctx,
 277                                               const char *quantizer_values,
 278                                               const int is_keyframe) {
 279   char *input_string;
 280   char *token;
 281   const char *delim = ",";
 282   char *save_ptr;
 283   int found = 0;
 284   int i, q;
 285   vpx_codec_err_t res = VPX_CODEC_OK;
 286   SvcInternal *const si = get_svc_internal(svc_ctx);
 287
 288   if (quantizer_values == NULL || strlen(quantizer_values) == 0) {
 289     if (is_keyframe) {
 290       // If there non settings for key frame, we will apply settings from
 291       // non key frame. So just simply return here.
 292       return VPX_CODEC_INVALID_PARAM;
 293     }
 294     input_string = strdup(DEFAULT_QUANTIZER_VALUES);
 295   } else {
 296     input_string = strdup(quantizer_values);
 297   }
 298
 299   token = strtok_r(input_string, delim, &save_ptr);
 300   for (i = 0; i < svc_ctx->spatial_layers; ++i) {
 301     if (token != NULL) {
 302       q = atoi(token);
 303       if (q <= 0 || q > 100) {
 304         svc_log(svc_ctx, SVC_LOG_ERROR,
 305                 "svc-quantizer-values: invalid value %s\n", token);
 306         res = VPX_CODEC_INVALID_PARAM;
 307         break;
 308       }
 309       token = strtok_r(NULL, delim, &save_ptr);
 310       found = i + 1;
 311     } else {
 312       q = 0;
 313     }
 314     if (is_keyframe) {
 315       si->quantizer_keyframe[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers]
 316       = q;
 317     } else {
 318       si->quantizer[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] = q;
 319     }
 320   }
 321   if (res == VPX_CODEC_OK && found != svc_ctx->spatial_layers) {
 322     svc_log(svc_ctx, SVC_LOG_ERROR,
 323             "svc: quantizers: %d values required, but only %d specified\n",
 324             svc_ctx->spatial_layers, found);
 325     res = VPX_CODEC_INVALID_PARAM;
 326   }
 327   free(input_string);
 328   return res;
 329 }
 330
 331 static void log_invalid_scale_factor(SvcContext *svc_ctx, const char *value) {
 332   svc_log(svc_ctx, SVC_LOG_ERROR, "svc scale-factors: invalid value %s\n",
 333           value);
 334 }
 335
 336 static vpx_codec_err_t parse_scale_factors(SvcContext *svc_ctx,
 337                                            const char *scale_factors) {
 338   char *input_string;
 339   char *token;
 340   const char *delim = ",";
 341   char *save_ptr;
 342   int found = 0;
 343   int i;
 344   int64_t num, den;
 345   vpx_codec_err_t res = VPX_CODEC_OK;
 346   SvcInternal *const si = get_svc_internal(svc_ctx);
 347
 348   if (scale_factors == NULL || strlen(scale_factors) == 0) {
 349     input_string = strdup(DEFAULT_SCALE_FACTORS);
 350   } else {
 351     input_string = strdup(scale_factors);
 352   }
 353   token = strtok_r(input_string, delim, &save_ptr);
 354   for (i = 0; i < svc_ctx->spatial_layers; ++i) {
 355     num = den = 0;
 356     if (token != NULL) {
 357       num = strtol(token, &token, 10);
 358       if (num <= 0) {
 359         log_invalid_scale_factor(svc_ctx, token);
 360         res = VPX_CODEC_INVALID_PARAM;
 361         break;
 362       }
 363       if (*token++ != '/') {
 364         log_invalid_scale_factor(svc_ctx, token);
 365         res = VPX_CODEC_INVALID_PARAM;
 366         break;
 367       }
 368       den = strtol(token, &token, 10);
 369       if (den <= 0) {
 370         log_invalid_scale_factor(svc_ctx, token);
 371         res = VPX_CODEC_INVALID_PARAM;
 372         break;
 373       }
 374       token = strtok_r(NULL, delim, &save_ptr);
 375       found = i + 1;
 376     }
 377     si->scaling_factor_num[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] =
 378         (int)num;
 379     si->scaling_factor_den[i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers] =
 380         (int)den;
 381   }
 382   if (res == VPX_CODEC_OK && found != svc_ctx->spatial_layers) {
 383     svc_log(svc_ctx, SVC_LOG_ERROR,
 384             "svc: scale-factors: %d values required, but only %d specified\n",
 385             svc_ctx->spatial_layers, found);
 386     res = VPX_CODEC_INVALID_PARAM;
 387   }
 388   free(input_string);
 389   return res;
 390 }
 391
 392 /**
 393  * Parse SVC encoding options
 394  * Format: encoding-mode=<svc_mode>,layers=<layer_count>
 395  *         scale-factors=<n1>/<d1>,<n2>/<d2>,...
 396  *         quantizers=<q1>,<q2>,...
 397  * svc_mode = [i|ip|alt_ip|gf]
 398  */
 399 static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) {
 400   char *input_string;
 401   char *option_name;
 402   char *option_value;
 403   char *input_ptr;
 404   int is_keyframe_qaunt_set = 0;
 405   vpx_codec_err_t res = VPX_CODEC_OK;
 406
 407   if (options == NULL) return VPX_CODEC_OK;
 408   input_string = strdup(options);
 409
 410   // parse option name
 411   option_name = strtok_r(input_string, "=", &input_ptr);
 412   while (option_name != NULL) {
 413     // parse option value
 414     option_value = strtok_r(NULL, " ", &input_ptr);
 415     if (option_value == NULL) {
 416       svc_log(svc_ctx, SVC_LOG_ERROR, "option missing value: %s\n",
 417               option_name);
 418       res = VPX_CODEC_INVALID_PARAM;
 419       break;
 420     }
 421     if (strcmp("encoding-mode", option_name) == 0) {
 422       res = set_option_encoding_mode(svc_ctx, option_value);
 423       if (res != VPX_CODEC_OK) break;
 424     } else if (strcmp("layers", option_name) == 0) {
 425       svc_ctx->spatial_layers = atoi(option_value);
 426     } else if (strcmp("scale-factors", option_name) == 0) {
 427       res = parse_scale_factors(svc_ctx, option_value);
 428       if (res != VPX_CODEC_OK) break;
 429     } else if (strcmp("quantizers", option_name) == 0) {
 430       res = parse_quantizer_values(svc_ctx, option_value, 0);
 431       if (res != VPX_CODEC_OK) break;
 432       if (!is_keyframe_qaunt_set) {
 433         SvcInternal *const si = get_svc_internal(svc_ctx);
 434         memcpy(get_svc_internal(svc_ctx)->quantizer_keyframe, si->quantizer,
 435                sizeof(si->quantizer));
 436       }
 437     } else if (strcmp("quantizers-keyframe", option_name) == 0) {
 438       res = parse_quantizer_values(svc_ctx, option_value, 1);
 439       if (res != VPX_CODEC_OK) break;
 440       is_keyframe_qaunt_set = 1;
 441     } else {
 442       svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name);
 443       res = VPX_CODEC_INVALID_PARAM;
 444       break;
 445     }
 446     option_name = strtok_r(NULL, "=", &input_ptr);
 447   }
 448   free(input_string);
 449   return res;
 450 }
 451
 452 vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options) {
 453   SvcInternal *const si = get_svc_internal(svc_ctx);
 454   if (svc_ctx == NULL || options == NULL || si == NULL) {
 455     return VPX_CODEC_INVALID_PARAM;
 456   }
 457   strncpy(si->options, options, sizeof(si->options));
 458   si->options[sizeof(si->options) - 1] = '\0';
 459   return VPX_CODEC_OK;
 460 }
 461
 462 vpx_codec_err_t vpx_svc_set_quantizers(SvcContext *svc_ctx,
 463                                        const char *quantizers,
 464                                        const int is_for_keyframe) {
 465   SvcInternal *const si = get_svc_internal(svc_ctx);
 466   if (svc_ctx == NULL || quantizers == NULL || si == NULL) {
 467     return VPX_CODEC_INVALID_PARAM;
 468   }
 469   if (is_for_keyframe) {
 470     strncpy(si->quantizers_keyframe, quantizers, sizeof(si->quantizers));
 471     si->quantizers_keyframe[sizeof(si->quantizers_keyframe) - 1] = '\0';
 472   } else {
 473     strncpy(si->quantizers, quantizers, sizeof(si->quantizers));
 474     si->quantizers[sizeof(si->quantizers) - 1] = '\0';
 475   }
 476   return VPX_CODEC_OK;
 477 }
 478
 479 vpx_codec_err_t vpx_svc_set_scale_factors(SvcContext *svc_ctx,
 480                                           const char *scale_factors) {
 481   SvcInternal *const si = get_svc_internal(svc_ctx);
 482   if (svc_ctx == NULL || scale_factors == NULL || si == NULL) {
 483     return VPX_CODEC_INVALID_PARAM;
 484   }
 485   strncpy(si->scale_factors, scale_factors, sizeof(si->scale_factors));
 486   si->scale_factors[sizeof(si->scale_factors) - 1] = '\0';
 487   return VPX_CODEC_OK;
 488 }
 489
 490 vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
 491                              vpx_codec_iface_t *iface,
 492                              vpx_codec_enc_cfg_t *enc_cfg) {
 493   int max_intra_size_pct;
 494   vpx_codec_err_t res;
 495   SvcInternal *const si = get_svc_internal(svc_ctx);
 496   if (svc_ctx == NULL || codec_ctx == NULL || iface == NULL ||
 497       enc_cfg == NULL) {
 498     return VPX_CODEC_INVALID_PARAM;
 499   }
 500   if (si == NULL) return VPX_CODEC_MEM_ERROR;
 501
 502   si->codec_ctx = codec_ctx;
 503
 504   si->width = enc_cfg->g_w;
 505   si->height = enc_cfg->g_h;
 506
 507   if (enc_cfg->kf_max_dist < 2) {
 508     svc_log(svc_ctx, SVC_LOG_ERROR, "key frame distance too small: %d\n",
 509             enc_cfg->kf_max_dist);
 510     return VPX_CODEC_INVALID_PARAM;
 511   }
 512   si->kf_dist = enc_cfg->kf_max_dist;
 513
 514   if (svc_ctx->spatial_layers == 0)
 515     svc_ctx->spatial_layers = VPX_SS_DEFAULT_LAYERS;
 516   if (svc_ctx->spatial_layers < 1 ||
 517       svc_ctx->spatial_layers > VPX_SS_MAX_LAYERS) {
 518     svc_log(svc_ctx, SVC_LOG_ERROR, "spatial layers: invalid value: %d\n",
 519             svc_ctx->spatial_layers);
 520     return VPX_CODEC_INVALID_PARAM;
 521   }
 522   // use SvcInternal value for number of layers to enable forcing single layer
 523   // for first frame
 524   si->layers = svc_ctx->spatial_layers;
 525
 526   res = parse_quantizer_values(svc_ctx, si->quantizers, 0);
 527   if (res != VPX_CODEC_OK) return res;
 528
 529   res = parse_quantizer_values(svc_ctx, si->quantizers_keyframe, 1);
 530   if (res != VPX_CODEC_OK)
 531     memcpy(si->quantizer_keyframe, si->quantizer, sizeof(si->quantizer));
 532
 533   res = parse_scale_factors(svc_ctx, si->scale_factors);
 534   if (res != VPX_CODEC_OK) return res;
 535
 536   // parse aggregate command line options
 537   res = parse_options(svc_ctx, si->options);
 538   if (res != VPX_CODEC_OK) return res;
 539
 540   // Assign target bitrate for each layer. We calculate the ratio
 541   // from the resolution for now.
 542   // TODO(Minghai): Optimize the mechanism of allocating bits after
 543   // implementing svc two pass rate control.
 544   if (si->layers > 1) {
 545     int i;
 546     float total = 0;
 547     float alloc_ratio[VPX_SS_MAX_LAYERS] = {0};
 548
 549     for (i = 0; i < si->layers; ++i) {
 550       int pos = i + VPX_SS_MAX_LAYERS - svc_ctx->spatial_layers;
 551       alloc_ratio[i] = si->scaling_factor_num[pos] * 1.0 /
 552                        si->scaling_factor_den[pos];
 553       alloc_ratio[i] *= alloc_ratio[i];
 554       total += alloc_ratio[i];
 555     }
 556
 557     for (i = 0; i < si->layers; ++i) {
 558       enc_cfg->ss_target_bitrate[i] = enc_cfg->rc_target_bitrate *
 559           alloc_ratio[i] / total;
 560     }
 561   }
 562
 563   // modify encoder configuration
 564   enc_cfg->ss_number_layers = si->layers;
 565   enc_cfg->ts_number_layers = 1;  // Temporal layers not used in this encoder.
 566   enc_cfg->kf_mode = VPX_KF_DISABLED;
 567   enc_cfg->g_pass = VPX_RC_ONE_PASS;
 568   // Lag in frames not currently supported
 569   enc_cfg->g_lag_in_frames = 0;
 570
 571   // TODO(ivanmaltz): determine if these values need to be set explicitly for
 572   // svc, or if the normal default/override mechanism can be used
 573   enc_cfg->rc_dropframe_thresh = 0;
 574   enc_cfg->rc_end_usage = VPX_CBR;
 575   enc_cfg->rc_resize_allowed = 0;
 576   enc_cfg->rc_min_quantizer = 33;
 577   enc_cfg->rc_max_quantizer = 33;
 578   enc_cfg->rc_undershoot_pct = 100;
 579   enc_cfg->rc_overshoot_pct = 15;
 580   enc_cfg->rc_buf_initial_sz = 500;
 581   enc_cfg->rc_buf_optimal_sz = 600;
 582   enc_cfg->rc_buf_sz = 1000;
 583   enc_cfg->g_error_resilient = 1;
 584
 585   // Initialize codec
 586   res = vpx_codec_enc_init(codec_ctx, iface, enc_cfg, VPX_CODEC_USE_PSNR);
 587   if (res != VPX_CODEC_OK) {
 588     svc_log(svc_ctx, SVC_LOG_ERROR, "svc_enc_init error\n");
 589     return res;
 590   }
 591
 592   vpx_codec_control(codec_ctx, VP9E_SET_SVC, 1);
 593   vpx_codec_control(codec_ctx, VP8E_SET_CPUUSED, 1);
 594   vpx_codec_control(codec_ctx, VP8E_SET_STATIC_THRESHOLD, 1);
 595   vpx_codec_control(codec_ctx, VP8E_SET_NOISE_SENSITIVITY, 1);
 596   vpx_codec_control(codec_ctx, VP8E_SET_TOKEN_PARTITIONS, 1);
 597
 598   max_intra_size_pct =
 599       (int)(((double)enc_cfg->rc_buf_optimal_sz * 0.5) *
 600             ((double)enc_cfg->g_timebase.den / enc_cfg->g_timebase.num) / 10.0);
 601   vpx_codec_control(codec_ctx, VP8E_SET_MAX_INTRA_BITRATE_PCT,
 602                     max_intra_size_pct);
 603   return VPX_CODEC_OK;
 604 }
 605
 606 // SVC Algorithm flags - these get mapped to VP8_EFLAG_* defined in vp8cx.h
 607
 608 // encoder should reference the last frame
 609 #define USE_LAST (1 << 0)
 610
 611 // encoder should reference the alt ref frame
 612 #define USE_ARF (1 << 1)
 613
 614 // encoder should reference the golden frame
 615 #define USE_GF (1 << 2)
 616
 617 // encoder should copy current frame to the last frame buffer
 618 #define UPDATE_LAST (1 << 3)
 619
 620 // encoder should copy current frame to the alt ref frame buffer
 621 #define UPDATE_ARF (1 << 4)
 622
 623 // encoder should copy current frame to the golden frame
 624 #define UPDATE_GF (1 << 5)
 625
 626 static int map_vp8_flags(int svc_flags) {
 627   int flags = 0;
 628
 629   if (!(svc_flags & USE_LAST)) flags |= VP8_EFLAG_NO_REF_LAST;
 630   if (!(svc_flags & USE_ARF)) flags |= VP8_EFLAG_NO_REF_ARF;
 631   if (!(svc_flags & USE_GF)) flags |= VP8_EFLAG_NO_REF_GF;
 632
 633   if (svc_flags & UPDATE_LAST) {
 634     // last is updated automatically
 635   } else {
 636     flags |= VP8_EFLAG_NO_UPD_LAST;
 637   }
 638   if (svc_flags & UPDATE_ARF) {
 639     flags |= VP8_EFLAG_FORCE_ARF;
 640   } else {
 641     flags |= VP8_EFLAG_NO_UPD_ARF;
 642   }
 643   if (svc_flags & UPDATE_GF) {
 644     flags |= VP8_EFLAG_FORCE_GF;
 645   } else {
 646     flags |= VP8_EFLAG_NO_UPD_GF;
 647   }
 648   return flags;
 649 }
 650
 651 static void calculate_enc_frame_flags(SvcContext *svc_ctx) {
 652   vpx_enc_frame_flags_t flags = VPX_EFLAG_FORCE_KF;
 653   SvcInternal *const si = get_svc_internal(svc_ctx);
 654   const int is_keyframe = (si->frame_within_gop == 0);
 655
 656   // keyframe layer zero is identical for all modes
 657   if (is_keyframe && si->layer == 0) {
 658     si->enc_frame_flags = VPX_EFLAG_FORCE_KF;
 659     return;
 660   }
 661
 662   switch (svc_ctx->encoding_mode) {
 663     case ALT_INTER_LAYER_PREDICTION_IP:
 664       if (si->layer == 0) {
 665         flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
 666       } else if (is_keyframe) {
 667         if (si->layer == si->layers - 1) {
 668           flags = map_vp8_flags(USE_ARF | UPDATE_LAST);
 669         } else {
 670           flags = map_vp8_flags(USE_ARF | UPDATE_LAST | UPDATE_GF);
 671         }
 672       } else {
 673         flags = map_vp8_flags(USE_LAST | USE_ARF | UPDATE_LAST);
 674       }
 675       break;
 676     case INTER_LAYER_PREDICTION_I:
 677       if (si->layer == 0) {
 678         flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
 679       } else if (is_keyframe) {
 680         flags = map_vp8_flags(USE_ARF | UPDATE_LAST);
 681       } else {
 682         flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
 683       }
 684       break;
 685     case INTER_LAYER_PREDICTION_IP:
 686       if (si->layer == 0) {
 687         flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
 688       } else if (is_keyframe) {
 689         flags = map_vp8_flags(USE_ARF | UPDATE_LAST);
 690       } else {
 691         flags = map_vp8_flags(USE_LAST | USE_ARF | UPDATE_LAST);
 692       }
 693       break;
 694     case USE_GOLDEN_FRAME:
 695       if (2 * si->layers - SVC_REFERENCE_FRAMES <= si->layer) {
 696         if (si->layer == 0) {
 697           flags = map_vp8_flags(USE_LAST | USE_GF | UPDATE_LAST);
 698         } else if (is_keyframe) {
 699           flags = map_vp8_flags(USE_ARF | UPDATE_LAST | UPDATE_GF);
 700         } else {
 701           flags = map_vp8_flags(USE_LAST | USE_ARF | USE_GF | UPDATE_LAST);
 702         }
 703       } else {
 704         if (si->layer == 0) {
 705           flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
 706         } else if (is_keyframe) {
 707           flags = map_vp8_flags(USE_ARF | UPDATE_LAST);
 708         } else {
 709           flags = map_vp8_flags(USE_LAST | UPDATE_LAST);
 710         }
 711       }
 712       break;
 713     default:
 714       svc_log(svc_ctx, SVC_LOG_ERROR, "unexpected encoding mode: %d\n",
 715               svc_ctx->encoding_mode);
 716       break;
 717   }
 718   si->enc_frame_flags = flags;
 719 }
 720
 721 vpx_codec_err_t vpx_svc_get_layer_resolution(const SvcContext *svc_ctx,
 722                                              int layer,
 723                                              unsigned int *width,
 724                                              unsigned int *height) {
 725   int w, h, index, num, den;
 726   const SvcInternal *const si = get_const_svc_internal(svc_ctx);
 727
 728   if (svc_ctx == NULL || si == NULL || width == NULL || height == NULL) {
 729     return VPX_CODEC_INVALID_PARAM;
 730   }
 731   if (layer < 0 || layer >= si->layers) return VPX_CODEC_INVALID_PARAM;
 732
 733   index = layer + VPX_SS_MAX_LAYERS - si->layers;
 734   num = si->scaling_factor_num[index];
 735   den = si->scaling_factor_den[index];
 736   if (num == 0 || den == 0) return VPX_CODEC_INVALID_PARAM;
 737
 738   w = si->width * num / den;
 739   h = si->height * num / den;
 740
 741   // make height and width even to make chrome player happy
 742   w += w % 2;
 743   h += h % 2;
 744
 745   *width = w;
 746   *height = h;
 747
 748   return VPX_CODEC_OK;
 749 }
 750
 751 static void set_svc_parameters(SvcContext *svc_ctx,
 752                                vpx_codec_ctx_t *codec_ctx) {
 753   int layer, layer_index;
 754   vpx_svc_parameters_t svc_params;
 755   SvcInternal *const si = get_svc_internal(svc_ctx);
 756
 757   memset(&svc_params, 0, sizeof(svc_params));
 758   svc_params.temporal_layer = 0;
 759   svc_params.spatial_layer = si->layer;
 760   svc_params.flags = si->enc_frame_flags;
 761
 762   layer = si->layer;
 763   if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP &&
 764       si->frame_within_gop == 0) {
 765     // layers 1 & 3 don't exist in this mode, use the higher one
 766     if (layer == 0 || layer == 2) {
 767       layer += 1;
 768     }
 769   }
 770   if (VPX_CODEC_OK != vpx_svc_get_layer_resolution(svc_ctx, layer,
 771                                                    &svc_params.width,
 772                                                    &svc_params.height)) {
 773     svc_log(svc_ctx, SVC_LOG_ERROR, "vpx_svc_get_layer_resolution failed\n");
 774   }
 775   layer_index = layer + VPX_SS_MAX_LAYERS - si->layers;
 776
 777   if (vpx_svc_is_keyframe(svc_ctx)) {
 778     svc_params.min_quantizer = si->quantizer_keyframe[layer_index];
 779     svc_params.max_quantizer = si->quantizer_keyframe[layer_index];
 780   } else {
 781     svc_params.min_quantizer = si->quantizer[layer_index];
 782     svc_params.max_quantizer = si->quantizer[layer_index];
 783   }
 784
 785   svc_params.distance_from_i_frame = si->frame_within_gop;
 786
 787   // Use buffer i for layer i LST
 788   svc_params.lst_fb_idx = si->layer;
 789
 790   // Use buffer i-1 for layer i Alt (Inter-layer prediction)
 791   if (si->layer != 0) {
 792     const int use_higher_layer =
 793         svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP &&
 794         si->frame_within_gop == 0;
 795     svc_params.alt_fb_idx = use_higher_layer ? si->layer - 2 : si->layer - 1;
 796   }
 797
 798   if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP) {
 799     svc_params.gld_fb_idx = si->layer + 1;
 800   } else {
 801     if (si->layer < 2 * si->layers - SVC_REFERENCE_FRAMES)
 802       svc_params.gld_fb_idx = svc_params.lst_fb_idx;
 803     else
 804       svc_params.gld_fb_idx = 2 * si->layers - 1 - si->layer;
 805   }
 806
 807   svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, layer: %d, %dx%d, q: %d\n",
 808           si->encode_frame_count, si->layer, svc_params.width,
 809           svc_params.height, svc_params.min_quantizer);
 810
 811   if (svc_params.flags == VPX_EFLAG_FORCE_KF) {
 812     svc_log(svc_ctx, SVC_LOG_DEBUG, "flags == VPX_EFLAG_FORCE_KF\n");
 813   } else {
 814     svc_log(
 815         svc_ctx, SVC_LOG_DEBUG, "Using:    LST/GLD/ALT [%2d|%2d|%2d]\n",
 816         svc_params.flags & VP8_EFLAG_NO_REF_LAST ? -1 : svc_params.lst_fb_idx,
 817         svc_params.flags & VP8_EFLAG_NO_REF_GF ? -1 : svc_params.gld_fb_idx,
 818         svc_params.flags & VP8_EFLAG_NO_REF_ARF ? -1 : svc_params.alt_fb_idx);
 819     svc_log(
 820         svc_ctx, SVC_LOG_DEBUG, "Updating: LST/GLD/ALT [%2d|%2d|%2d]\n",
 821         svc_params.flags & VP8_EFLAG_NO_UPD_LAST ? -1 : svc_params.lst_fb_idx,
 822         svc_params.flags & VP8_EFLAG_NO_UPD_GF ? -1 : svc_params.gld_fb_idx,
 823         svc_params.flags & VP8_EFLAG_NO_UPD_ARF ? -1 : svc_params.alt_fb_idx);
 824   }
 825
 826   vpx_codec_control(codec_ctx, VP9E_SET_SVC_PARAMETERS, &svc_params);
 827 }
 828
 829 /**
 830  * Encode a frame into multiple layers
 831  * Create a superframe containing the individual layers
 832  */
 833 vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
 834                                struct vpx_image *rawimg, vpx_codec_pts_t pts,
 835                                int64_t duration, int deadline) {
 836   vpx_codec_err_t res;
 837   vpx_codec_iter_t iter;
 838   const vpx_codec_cx_pkt_t *cx_pkt;
 839   struct LayerData *cx_layer_list = NULL;
 840   struct LayerData *layer_data;
 841   struct Superframe superframe;
 842   SvcInternal *const si = get_svc_internal(svc_ctx);
 843   if (svc_ctx == NULL || codec_ctx == NULL || rawimg == NULL || si == NULL) {
 844     return VPX_CODEC_INVALID_PARAM;
 845   }
 846
 847   memset(&superframe, 0, sizeof(superframe));
 848   svc_log_reset(svc_ctx);
 849
 850   si->layers = svc_ctx->spatial_layers;
 851   if (si->frame_within_gop >= si->kf_dist ||
 852       si->encode_frame_count == 0) {
 853     si->frame_within_gop = 0;
 854   }
 855   si->is_keyframe = (si->frame_within_gop == 0);
 856   si->frame_size = 0;
 857
 858   svc_log(svc_ctx, SVC_LOG_DEBUG,
 859           "vpx_svc_encode  layers: %d, frame_count: %d, frame_within_gop: %d\n",
 860           si->layers, si->encode_frame_count, si->frame_within_gop);
 861
 862   // encode each layer
 863   for (si->layer = 0; si->layer < si->layers; ++si->layer) {
 864     if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP &&
 865         si->is_keyframe && (si->layer == 1 || si->layer == 3)) {
 866       svc_log(svc_ctx, SVC_LOG_DEBUG, "Skip encoding layer %d\n", si->layer);
 867       continue;
 868     }
 869     calculate_enc_frame_flags(svc_ctx);
 870
 871     set_svc_parameters(svc_ctx, codec_ctx);
 872
 873     res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration,
 874                            si->enc_frame_flags, deadline);
 875     if (res != VPX_CODEC_OK) {
 876       return res;
 877     }
 878     // save compressed data
 879     iter = NULL;
 880     while ((cx_pkt = vpx_codec_get_cx_data(codec_ctx, &iter))) {
 881       switch (cx_pkt->kind) {
 882         case VPX_CODEC_CX_FRAME_PKT: {
 883           const uint32_t frame_pkt_size = (uint32_t)(cx_pkt->data.frame.sz);
 884           si->bytes_sum[si->layer] += frame_pkt_size;
 885           svc_log(svc_ctx, SVC_LOG_DEBUG,
 886                   "SVC frame: %d, layer: %d, size: %u\n",
 887                   si->encode_frame_count, si->layer, frame_pkt_size);
 888           layer_data =
 889               ld_create(cx_pkt->data.frame.buf, (size_t)frame_pkt_size);
 890           if (layer_data == NULL) {
 891             svc_log(svc_ctx, SVC_LOG_ERROR, "Error allocating LayerData\n");
 892             return VPX_CODEC_OK;
 893           }
 894           ld_list_add(&cx_layer_list, layer_data);
 895
 896           // save layer size in superframe index
 897           superframe.sizes[superframe.count++] = frame_pkt_size;
 898           superframe.magnitude |= frame_pkt_size;
 899           break;
 900         }
 901         case VPX_CODEC_PSNR_PKT: {
 902           int i;
 903           svc_log(svc_ctx, SVC_LOG_DEBUG,
 904                   "SVC frame: %d, layer: %d, PSNR(Total/Y/U/V): "
 905                   "%2.3f  %2.3f  %2.3f  %2.3f \n",
 906                   si->encode_frame_count, si->layer,
 907                   cx_pkt->data.psnr.psnr[0], cx_pkt->data.psnr.psnr[1],
 908                   cx_pkt->data.psnr.psnr[2], cx_pkt->data.psnr.psnr[3]);
 909           svc_log(svc_ctx, SVC_LOG_DEBUG,
 910                   "SVC frame: %d, layer: %d, SSE(Total/Y/U/V): "
 911                   "%2.3f  %2.3f  %2.3f  %2.3f \n",
 912                   si->encode_frame_count, si->layer,
 913                   cx_pkt->data.psnr.sse[0], cx_pkt->data.psnr.sse[1],
 914                   cx_pkt->data.psnr.sse[2], cx_pkt->data.psnr.sse[3]);
 915           for (i = 0; i < COMPONENTS; i++) {
 916             si->psnr_sum[si->layer][i] += cx_pkt->data.psnr.psnr[i];
 917             si->sse_sum[si->layer][i] += cx_pkt->data.psnr.sse[i];
 918           }
 919           break;
 920         }
 921         default: {
 922           break;
 923         }
 924       }
 925     }
 926   }
 927   // add superframe index to layer data list
 928   sf_create_index(&superframe);
 929   layer_data = ld_create(superframe.buffer, superframe.index_size);
 930   ld_list_add(&cx_layer_list, layer_data);
 931
 932   // get accumulated size of layer data
 933   si->frame_size = ld_list_get_buffer_size(cx_layer_list);
 934   if (si->frame_size == 0) return VPX_CODEC_ERROR;
 935
 936   // all layers encoded, create single buffer with concatenated layers
 937   if (si->frame_size > si->buffer_size) {
 938     free(si->buffer);
 939     si->buffer = malloc(si->frame_size);
 940     if (si->buffer == NULL) {
 941       ld_list_free(cx_layer_list);
 942       return VPX_CODEC_MEM_ERROR;
 943     }
 944     si->buffer_size = si->frame_size;
 945   }
 946   // copy layer data into packet
 947   ld_list_copy_to_buffer(cx_layer_list, (uint8_t *)si->buffer);
 948
 949   ld_list_free(cx_layer_list);
 950
 951   svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, pts: %d\n",
 952           si->encode_frame_count, si->is_keyframe, (int)si->frame_size,
 953           (int)pts);
 954   ++si->frame_within_gop;
 955   ++si->encode_frame_count;
 956
 957   return VPX_CODEC_OK;
 958 }
 959
 960 const char *vpx_svc_get_message(const SvcContext *svc_ctx) {
 961   const SvcInternal *const si = get_const_svc_internal(svc_ctx);
 962   if (svc_ctx == NULL || si == NULL) return NULL;
 963   return si->message_buffer;
 964 }
 965
 966 void *vpx_svc_get_buffer(const SvcContext *svc_ctx) {
 967   const SvcInternal *const si = get_const_svc_internal(svc_ctx);
 968   if (svc_ctx == NULL || si == NULL) return NULL;
 969   return si->buffer;
 970 }
 971
 972 size_t vpx_svc_get_frame_size(const SvcContext *svc_ctx) {
 973   const SvcInternal *const si = get_const_svc_internal(svc_ctx);
 974   if (svc_ctx == NULL || si == NULL) return 0;
 975   return si->frame_size;
 976 }
 977
 978 int vpx_svc_get_encode_frame_count(const SvcContext *svc_ctx) {
 979   const SvcInternal *const si = get_const_svc_internal(svc_ctx);
 980   if (svc_ctx == NULL || si == NULL) return 0;
 981   return si->encode_frame_count;
 982 }
 983
 984 int vpx_svc_is_keyframe(const SvcContext *svc_ctx) {
 985   const SvcInternal *const si = get_const_svc_internal(svc_ctx);
 986   if (svc_ctx == NULL || si == NULL) return 0;
 987   return si->is_keyframe;
 988 }
 989
 990 void vpx_svc_set_keyframe(SvcContext *svc_ctx) {
 991   SvcInternal *const si = get_svc_internal(svc_ctx);
 992   if (svc_ctx == NULL || si == NULL) return;
 993   si->frame_within_gop = 0;
 994 }
 995
 996 static double calc_psnr(double d) {
 997   if (d == 0) return 100;
 998   return -10.0 * log(d) / log(10.0);
 999 }
1000
1001 // dump accumulated statistics and reset accumulated values
1002 const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) {
1003   int number_of_frames, number_of_keyframes, encode_frame_count;
1004   int i, j;
1005   uint32_t bytes_total = 0;
1006   double scale[COMPONENTS];
1007   double psnr[COMPONENTS];
1008   double mse[COMPONENTS];
1009   double y_scale;
1010
1011   SvcInternal *const si = get_svc_internal(svc_ctx);
1012   if (svc_ctx == NULL || si == NULL) return NULL;
1013
1014   svc_log_reset(svc_ctx);
1015
1016   encode_frame_count = si->encode_frame_count;
1017   if (si->encode_frame_count <= 0) return vpx_svc_get_message(svc_ctx);
1018
1019   svc_log(svc_ctx, SVC_LOG_INFO, "\n");
1020   number_of_keyframes = encode_frame_count / si->kf_dist + 1;
1021   for (i = 0; i < si->layers; ++i) {
1022     number_of_frames = encode_frame_count;
1023
1024     if (svc_ctx->encoding_mode == ALT_INTER_LAYER_PREDICTION_IP &&
1025         (i == 1 || i == 3)) {
1026       number_of_frames -= number_of_keyframes;
1027     }
1028     svc_log(svc_ctx, SVC_LOG_INFO,
1029             "Layer %d Average PSNR=[%2.3f, %2.3f, %2.3f, %2.3f], Bytes=[%u]\n",
1030             i, (double)si->psnr_sum[i][0] / number_of_frames,
1031             (double)si->psnr_sum[i][1] / number_of_frames,
1032             (double)si->psnr_sum[i][2] / number_of_frames,
1033             (double)si->psnr_sum[i][3] / number_of_frames, si->bytes_sum[i]);
1034     // the following psnr calculation is deduced from ffmpeg.c#print_report
1035     y_scale = si->width * si->height * 255.0 * 255.0 * number_of_frames;
1036     scale[1] = y_scale;
1037     scale[2] = scale[3] = y_scale / 4;  // U or V
1038     scale[0] = y_scale * 1.5;           // total
1039
1040     for (j = 0; j < COMPONENTS; j++) {
1041       psnr[j] = calc_psnr(si->sse_sum[i][j] / scale[j]);
1042       mse[j] = si->sse_sum[i][j] * 255.0 * 255.0 / scale[j];
1043     }
1044     svc_log(svc_ctx, SVC_LOG_INFO,
1045             "Layer %d Overall PSNR=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, psnr[0],
1046             psnr[1], psnr[2], psnr[3]);
1047     svc_log(svc_ctx, SVC_LOG_INFO,
1048             "Layer %d Overall MSE=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, mse[0],
1049             mse[1], mse[2], mse[3]);
1050
1051     bytes_total += si->bytes_sum[i];
1052     // clear sums for next time
1053     si->bytes_sum[i] = 0;
1054     for (j = 0; j < COMPONENTS; ++j) {
1055       si->psnr_sum[i][j] = 0;
1056       si->sse_sum[i][j] = 0;
1057     }
1058   }
1059
1060   // only display statistics once
1061   si->encode_frame_count = 0;
1062
1063   svc_log(svc_ctx, SVC_LOG_INFO, "Total Bytes=[%u]\n", bytes_total);
1064   return vpx_svc_get_message(svc_ctx);
1065 }
1066
1067 void vpx_svc_release(SvcContext *svc_ctx) {
1068   SvcInternal *si;
1069   if (svc_ctx == NULL) return;
1070   // do not use get_svc_internal as it will unnecessarily allocate an
1071   // SvcInternal if it was not already allocated
1072   si = (SvcInternal *)svc_ctx->internal;
1073   if (si != NULL) {
1074     free(si->buffer);
1075     free(si);
1076     svc_ctx->internal = NULL;
1077   }
1078 }