AOMedia AV1 Codec
svc_encoder_rtc
1 /*
2  * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 // This is an example demonstrating how to implement a multi-layer AOM
13 // encoding scheme for RTC video applications.
14 
15 #include <assert.h>
16 #include <inttypes.h>
17 #include <limits.h>
18 #include <math.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include <memory>
24 
25 #include "config/aom_config.h"
26 
27 #if CONFIG_AV1_DECODER
28 #include "aom/aom_decoder.h"
29 #endif
30 #include "aom/aom_encoder.h"
31 #include "aom/aom_image.h"
32 #include "aom/aom_integer.h"
33 #include "aom/aomcx.h"
34 #include "aom_dsp/bitwriter_buffer.h"
35 #include "aom_ports/aom_timer.h"
36 #include "av1/ratectrl_rtc.h"
37 #include "common/args.h"
38 #include "common/tools_common.h"
39 #include "common/video_writer.h"
40 #include "examples/encoder_util.h"
41 #include "examples/multilayer_metadata.h"
42 
43 #define OPTION_BUFFER_SIZE 1024
44 #define MAX_NUM_SPATIAL_LAYERS 4
45 
46 typedef struct {
47  const char *output_filename;
48  char options[OPTION_BUFFER_SIZE];
49  struct AvxInputContext input_ctx[MAX_NUM_SPATIAL_LAYERS];
50  int speed;
51  int aq_mode;
52  int layering_mode;
53  int output_obu;
54  int decode;
55  int tune_content;
56  int show_psnr;
57  bool use_external_rc;
58  bool scale_factors_explicitly_set;
59  const char *multilayer_metadata_file;
60 } AppInput;
61 
62 typedef enum {
63  QUANTIZER = 0,
64  BITRATE,
65  SCALE_FACTOR,
66  AUTO_ALT_REF,
67  ALL_OPTION_TYPES
68 } LAYER_OPTION_TYPE;
69 
70 static const arg_def_t outputfile =
71  ARG_DEF("o", "output", 1, "Output filename");
72 static const arg_def_t frames_arg =
73  ARG_DEF("f", "frames", 1, "Number of frames to encode");
74 static const arg_def_t threads_arg =
75  ARG_DEF("th", "threads", 1, "Number of threads to use");
76 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
77 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
78 static const arg_def_t timebase_arg =
79  ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
80 static const arg_def_t bitrate_arg = ARG_DEF(
81  "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
82 static const arg_def_t spatial_layers_arg =
83  ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
84 static const arg_def_t temporal_layers_arg =
85  ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
86 static const arg_def_t layering_mode_arg =
87  ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
88 static const arg_def_t kf_dist_arg =
89  ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
90 static const arg_def_t scale_factors_arg =
91  ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
92 static const arg_def_t min_q_arg =
93  ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
94 static const arg_def_t max_q_arg =
95  ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
96 static const arg_def_t speed_arg =
97  ARG_DEF("sp", "speed", 1, "Speed configuration");
98 static const arg_def_t aqmode_arg =
99  ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
100 static const arg_def_t bitrates_arg =
101  ARG_DEF("bl", "bitrates", 1,
102  "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
103 static const arg_def_t dropframe_thresh_arg =
104  ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
105 static const arg_def_t error_resilient_arg =
106  ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
107 static const arg_def_t output_obu_arg =
108  ARG_DEF(NULL, "output-obu", 1,
109  "Write OBUs when set to 1. Otherwise write IVF files.");
110 static const arg_def_t test_decode_arg =
111  ARG_DEF(NULL, "test-decode", 1,
112  "Attempt to test decoding the output when set to 1. Default is 1.");
113 static const arg_def_t psnr_arg =
114  ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
115 static const arg_def_t ext_rc_arg =
116  ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
117 static const struct arg_enum_list tune_content_enum[] = {
118  { "default", AOM_CONTENT_DEFAULT },
119  { "screen", AOM_CONTENT_SCREEN },
120  { "film", AOM_CONTENT_FILM },
121  { NULL, 0 }
122 };
123 static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
124  NULL, "tune-content", 1, "Tune content type", tune_content_enum);
125 #if CONFIG_CWG_E050
126 static const arg_def_t multilayer_metadata_file_arg =
127  ARG_DEF("ml", "multilayer_metadata_file", 1,
128  "Experimental: path to multilayer metadata file");
129 #endif
130 
131 #if CONFIG_AV1_HIGHBITDEPTH
132 static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
133  { "10", AOM_BITS_10 },
134  { NULL, 0 } };
135 
136 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
137  "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
138 #endif // CONFIG_AV1_HIGHBITDEPTH
139 
140 static const arg_def_t *svc_args[] = {
141  &frames_arg,
142  &outputfile,
143  &width_arg,
144  &height_arg,
145  &timebase_arg,
146  &bitrate_arg,
147  &spatial_layers_arg,
148  &kf_dist_arg,
149  &scale_factors_arg,
150  &min_q_arg,
151  &max_q_arg,
152  &temporal_layers_arg,
153  &layering_mode_arg,
154  &threads_arg,
155  &aqmode_arg,
156 #if CONFIG_AV1_HIGHBITDEPTH
157  &bitdepth_arg,
158 #endif
159  &speed_arg,
160  &bitrates_arg,
161  &dropframe_thresh_arg,
162  &error_resilient_arg,
163  &output_obu_arg,
164  &test_decode_arg,
165  &tune_content_arg,
166  &psnr_arg,
167 #if CONFIG_CWG_E050
168  &multilayer_metadata_file_arg,
169 #endif
170  NULL,
171 };
172 
173 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
174 
175 static const char *exec_name;
176 
177 void usage_exit(void) {
178  fprintf(stderr,
179  "Usage: %s <options> input_filename [input_filename ...] -o "
180  "output_filename\n",
181  exec_name);
182  fprintf(stderr, "Options:\n");
183  arg_show_usage(stderr, svc_args);
184  fprintf(
185  stderr,
186  "Input files must be y4m or yuv.\n"
187  "If multiple input files are specified, they correspond to spatial "
188  "layers, and there should be as many as there are spatial layers.\n"
189  "All input files must have the same width, height, frame rate and number "
190  "of frames.\n"
191  "If only one file is specified, it is used for all spatial layers.\n");
192  exit(EXIT_FAILURE);
193 }
194 
195 static int file_is_y4m(const char detect[4]) {
196  return memcmp(detect, "YUV4", 4) == 0;
197 }
198 
199 static int fourcc_is_ivf(const char detect[4]) {
200  if (memcmp(detect, "DKIF", 4) == 0) {
201  return 1;
202  }
203  return 0;
204 }
205 
206 static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
207  1 };
208 
209 static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
210 
211 static void open_input_file(struct AvxInputContext *input,
213  /* Parse certain options from the input file, if possible */
214  input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
215  : set_binary_mode(stdin);
216 
217  if (!input->file) fatal("Failed to open input file");
218 
219  if (!fseeko(input->file, 0, SEEK_END)) {
220  /* Input file is seekable. Figure out how long it is, so we can get
221  * progress info.
222  */
223  input->length = ftello(input->file);
224  rewind(input->file);
225  }
226 
227  /* Default to 1:1 pixel aspect ratio. */
228  input->pixel_aspect_ratio.numerator = 1;
229  input->pixel_aspect_ratio.denominator = 1;
230 
231  /* For RAW input sources, these bytes will applied on the first frame
232  * in read_frame().
233  */
234  input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
235  input->detect.position = 0;
236 
237  if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
238  if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
239  input->only_i420) >= 0) {
240  input->file_type = FILE_TYPE_Y4M;
241  input->width = input->y4m.pic_w;
242  input->height = input->y4m.pic_h;
243  input->pixel_aspect_ratio.numerator = input->y4m.par_n;
244  input->pixel_aspect_ratio.denominator = input->y4m.par_d;
245  input->framerate.numerator = input->y4m.fps_n;
246  input->framerate.denominator = input->y4m.fps_d;
247  input->fmt = input->y4m.aom_fmt;
248  input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
249  } else {
250  fatal("Unsupported Y4M stream.");
251  }
252  } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
253  fatal("IVF is not supported as input.");
254  } else {
255  input->file_type = FILE_TYPE_RAW;
256  }
257 }
258 
259 static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
260  int *value0, int *value1) {
261  if (type == SCALE_FACTOR) {
262  *value0 = (int)strtol(input, &input, 10);
263  if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
264  *value1 = (int)strtol(input, &input, 10);
265 
266  if (*value0 < option_min_values[SCALE_FACTOR] ||
267  *value1 < option_min_values[SCALE_FACTOR] ||
268  *value0 > option_max_values[SCALE_FACTOR] ||
269  *value1 > option_max_values[SCALE_FACTOR] ||
270  *value0 > *value1) // num shouldn't be greater than den
272  } else {
273  *value0 = atoi(input);
274  if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
276  }
277  return AOM_CODEC_OK;
278 }
279 
280 static aom_codec_err_t parse_layer_options_from_string(
281  aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
282  int *option0, int *option1) {
284  char *input_string;
285  char *token;
286  const char *delim = ",";
287  int num_layers = svc_params->number_spatial_layers;
288  int i = 0;
289 
290  if (type == BITRATE)
291  num_layers =
292  svc_params->number_spatial_layers * svc_params->number_temporal_layers;
293 
294  if (input == NULL || option0 == NULL ||
295  (option1 == NULL && type == SCALE_FACTOR))
297 
298  const size_t input_length = strlen(input);
299  input_string = reinterpret_cast<char *>(malloc(input_length + 1));
300  if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
301  memcpy(input_string, input, input_length + 1);
302  token = strtok(input_string, delim); // NOLINT
303  for (i = 0; i < num_layers; ++i) {
304  if (token != NULL) {
305  res = extract_option(type, token, option0 + i, option1 + i);
306  if (res != AOM_CODEC_OK) break;
307  token = strtok(NULL, delim); // NOLINT
308  } else {
310  break;
311  }
312  }
313  free(input_string);
314  return res;
315 }
316 
317 static void parse_command_line(int argc, const char **argv_,
318  AppInput *app_input,
319  aom_svc_params_t *svc_params,
320  aom_codec_enc_cfg_t *enc_cfg) {
321  struct arg arg;
322  char **argv = NULL;
323  char **argi = NULL;
324  char **argj = NULL;
325  char string_options[1024] = { 0 };
326 
327  // Default settings
328  svc_params->number_spatial_layers = 1;
329  svc_params->number_temporal_layers = 1;
330  app_input->layering_mode = 0;
331  app_input->output_obu = 0;
332  app_input->decode = 1;
333  enc_cfg->g_threads = 1;
334  enc_cfg->rc_end_usage = AOM_CBR;
335 
336  // process command line options
337  argv = argv_dup(argc - 1, argv_ + 1);
338  if (!argv) {
339  fprintf(stderr, "Error allocating argument list\n");
340  exit(EXIT_FAILURE);
341  }
342  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
343  arg.argv_step = 1;
344 
345  if (arg_match(&arg, &outputfile, argi)) {
346  app_input->output_filename = arg.val;
347  } else if (arg_match(&arg, &width_arg, argi)) {
348  enc_cfg->g_w = arg_parse_uint(&arg);
349  } else if (arg_match(&arg, &height_arg, argi)) {
350  enc_cfg->g_h = arg_parse_uint(&arg);
351  } else if (arg_match(&arg, &timebase_arg, argi)) {
352  enc_cfg->g_timebase = arg_parse_rational(&arg);
353  } else if (arg_match(&arg, &bitrate_arg, argi)) {
354  enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
355  } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
356  svc_params->number_spatial_layers = arg_parse_uint(&arg);
357  } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
358  svc_params->number_temporal_layers = arg_parse_uint(&arg);
359  } else if (arg_match(&arg, &speed_arg, argi)) {
360  app_input->speed = arg_parse_uint(&arg);
361  if (app_input->speed > 11) {
362  aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
363  }
364  } else if (arg_match(&arg, &aqmode_arg, argi)) {
365  app_input->aq_mode = arg_parse_uint(&arg);
366  } else if (arg_match(&arg, &threads_arg, argi)) {
367  enc_cfg->g_threads = arg_parse_uint(&arg);
368  } else if (arg_match(&arg, &layering_mode_arg, argi)) {
369  app_input->layering_mode = arg_parse_int(&arg);
370  } else if (arg_match(&arg, &kf_dist_arg, argi)) {
371  enc_cfg->kf_min_dist = arg_parse_uint(&arg);
372  enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
373  } else if (arg_match(&arg, &scale_factors_arg, argi)) {
374  aom_codec_err_t res = parse_layer_options_from_string(
375  svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
376  svc_params->scaling_factor_den);
377  app_input->scale_factors_explicitly_set = true;
378  if (res != AOM_CODEC_OK) {
379  die("Failed to parse scale factors: %s\n",
381  }
382  } else if (arg_match(&arg, &min_q_arg, argi)) {
383  enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
384  } else if (arg_match(&arg, &max_q_arg, argi)) {
385  enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
386 #if CONFIG_AV1_HIGHBITDEPTH
387  } else if (arg_match(&arg, &bitdepth_arg, argi)) {
388  enc_cfg->g_bit_depth =
389  static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
390  switch (enc_cfg->g_bit_depth) {
391  case AOM_BITS_8:
392  enc_cfg->g_input_bit_depth = 8;
393  enc_cfg->g_profile = 0;
394  break;
395  case AOM_BITS_10:
396  enc_cfg->g_input_bit_depth = 10;
397  enc_cfg->g_profile = 0;
398  break;
399  default:
400  die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
401  }
402 #endif // CONFIG_VP9_HIGHBITDEPTH
403  } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
404  enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
405  } else if (arg_match(&arg, &error_resilient_arg, argi)) {
406  enc_cfg->g_error_resilient = arg_parse_uint(&arg);
407  if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
408  die("Invalid value for error resilient (0, 1): %d.",
409  enc_cfg->g_error_resilient);
410  } else if (arg_match(&arg, &output_obu_arg, argi)) {
411  app_input->output_obu = arg_parse_uint(&arg);
412  if (app_input->output_obu != 0 && app_input->output_obu != 1)
413  die("Invalid value for obu output flag (0, 1): %d.",
414  app_input->output_obu);
415  } else if (arg_match(&arg, &test_decode_arg, argi)) {
416  app_input->decode = arg_parse_uint(&arg);
417  if (app_input->decode != 0 && app_input->decode != 1)
418  die("Invalid value for test decode flag (0, 1): %d.",
419  app_input->decode);
420  } else if (arg_match(&arg, &tune_content_arg, argi)) {
421  app_input->tune_content = arg_parse_enum_or_int(&arg);
422  printf("tune content %d\n", app_input->tune_content);
423  } else if (arg_match(&arg, &psnr_arg, argi)) {
424  app_input->show_psnr = 1;
425  } else if (arg_match(&arg, &ext_rc_arg, argi)) {
426  app_input->use_external_rc = true;
427 #if CONFIG_CWG_E050
428  } else if (arg_match(&arg, &multilayer_metadata_file_arg, argi)) {
429  app_input->multilayer_metadata_file = arg.val;
430 #endif
431  } else {
432  ++argj;
433  }
434  }
435 
436  // Total bitrate needs to be parsed after the number of layers.
437  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
438  arg.argv_step = 1;
439  if (arg_match(&arg, &bitrates_arg, argi)) {
440  aom_codec_err_t res = parse_layer_options_from_string(
441  svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
442  if (res != AOM_CODEC_OK) {
443  die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
444  }
445  } else {
446  ++argj;
447  }
448  }
449 
450  // There will be a space in front of the string options
451  if (strlen(string_options) > 0)
452  strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
453 
454  // Check for unrecognized options
455  for (argi = argv; *argi; ++argi)
456  if (argi[0][0] == '-' && strlen(argi[0]) > 1)
457  die("Error: Unrecognized option %s\n", *argi);
458 
459  if (argv[0] == NULL) {
460  usage_exit();
461  }
462 
463  int input_count = 0;
464  while (argv[input_count] != NULL && input_count < MAX_NUM_SPATIAL_LAYERS) {
465  app_input->input_ctx[input_count].filename = argv[input_count];
466  ++input_count;
467  }
468  if (input_count > 1 && input_count != svc_params->number_spatial_layers) {
469  die("Error: Number of input files does not match number of spatial layers");
470  }
471  if (argv[input_count] != NULL) {
472  die("Error: Too many input files specified, there should be at most %d",
473  MAX_NUM_SPATIAL_LAYERS);
474  }
475 
476  free(argv);
477 
478  for (int i = 0; i < input_count; ++i) {
479  open_input_file(&app_input->input_ctx[i], AOM_CSP_UNKNOWN);
480  if (app_input->input_ctx[i].file_type == FILE_TYPE_Y4M) {
481  if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
482  // Override these settings with the info from Y4M file.
483  enc_cfg->g_w = app_input->input_ctx[i].width;
484  enc_cfg->g_h = app_input->input_ctx[i].height;
485  // g_timebase is the reciprocal of frame rate.
486  enc_cfg->g_timebase.num = app_input->input_ctx[i].framerate.denominator;
487  enc_cfg->g_timebase.den = app_input->input_ctx[i].framerate.numerator;
488  } else if (enc_cfg->g_w != app_input->input_ctx[i].width ||
489  enc_cfg->g_h != app_input->input_ctx[i].height ||
490  enc_cfg->g_timebase.num !=
491  app_input->input_ctx[i].framerate.denominator ||
492  enc_cfg->g_timebase.den !=
493  app_input->input_ctx[i].framerate.numerator) {
494  die("Error: Input file dimensions and/or frame rate mismatch");
495  }
496  }
497  }
498  if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
499  die("Error: Input file dimensions not set, use -w and -h");
500  }
501 
502  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
503  enc_cfg->g_h % 2)
504  die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
505 
506  printf(
507  "Codec %s\n"
508  "layers: %d\n"
509  "width %u, height: %u\n"
510  "num: %d, den: %d, bitrate: %u\n"
511  "gop size: %u\n",
513  svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
514  enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
515  enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
516 }
517 
518 static const int mode_to_num_temporal_layers[12] = {
519  1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
520 };
521 static const int mode_to_num_spatial_layers[12] = {
522  1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
523 };
524 
525 // For rate control encoding stats.
526 struct RateControlMetrics {
527  // Number of input frames per layer.
528  int layer_input_frames[AOM_MAX_TS_LAYERS];
529  // Number of encoded non-key frames per layer.
530  int layer_enc_frames[AOM_MAX_TS_LAYERS];
531  // Framerate per layer layer (cumulative).
532  double layer_framerate[AOM_MAX_TS_LAYERS];
533  // Target average frame size per layer (per-frame-bandwidth per layer).
534  double layer_pfb[AOM_MAX_LAYERS];
535  // Actual average frame size per layer.
536  double layer_avg_frame_size[AOM_MAX_LAYERS];
537  // Average rate mismatch per layer (|target - actual| / target).
538  double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
539  // Actual encoding bitrate per layer (cumulative across temporal layers).
540  double layer_encoding_bitrate[AOM_MAX_LAYERS];
541  // Average of the short-time encoder actual bitrate.
542  // TODO(marpan): Should we add these short-time stats for each layer?
543  double avg_st_encoding_bitrate;
544  // Variance of the short-time encoder actual bitrate.
545  double variance_st_encoding_bitrate;
546  // Window (number of frames) for computing short-timee encoding bitrate.
547  int window_size;
548  // Number of window measurements.
549  int window_count;
550  int layer_target_bitrate[AOM_MAX_LAYERS];
551 };
552 
553 static const int REF_FRAMES = 8;
554 
555 static const int INTER_REFS_PER_FRAME = 7;
556 
557 // Reference frames used in this example encoder.
558 enum {
559  SVC_LAST_FRAME = 0,
560  SVC_LAST2_FRAME,
561  SVC_LAST3_FRAME,
562  SVC_GOLDEN_FRAME,
563  SVC_BWDREF_FRAME,
564  SVC_ALTREF2_FRAME,
565  SVC_ALTREF_FRAME
566 };
567 
568 static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
569  FILE *f = input_ctx->file;
570  y4m_input *y4m = &input_ctx->y4m;
571  int shortread = 0;
572 
573  if (input_ctx->file_type == FILE_TYPE_Y4M) {
574  if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
575  } else {
576  shortread = read_yuv_frame(input_ctx, img);
577  }
578 
579  return !shortread;
580 }
581 
582 static void close_input_file(struct AvxInputContext *input) {
583  fclose(input->file);
584  if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
585 }
586 
587 // Note: these rate control metrics assume only 1 key frame in the
588 // sequence (i.e., first frame only). So for temporal pattern# 7
589 // (which has key frame for every frame on base layer), the metrics
590 // computation will be off/wrong.
591 // TODO(marpan): Update these metrics to account for multiple key frames
592 // in the stream.
593 static void set_rate_control_metrics(struct RateControlMetrics *rc,
594  double framerate, int ss_number_layers,
595  int ts_number_layers) {
596  int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
597  ts_rate_decimator[0] = 1;
598  if (ts_number_layers == 2) {
599  ts_rate_decimator[0] = 2;
600  ts_rate_decimator[1] = 1;
601  }
602  if (ts_number_layers == 3) {
603  ts_rate_decimator[0] = 4;
604  ts_rate_decimator[1] = 2;
605  ts_rate_decimator[2] = 1;
606  }
607  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
608  // per-frame-bandwidth, for the rate control encoding stats below.
609  for (int sl = 0; sl < ss_number_layers; ++sl) {
610  int i = sl * ts_number_layers;
611  rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
612  rc->layer_pfb[i] =
613  1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
614  for (int tl = 0; tl < ts_number_layers; ++tl) {
615  i = sl * ts_number_layers + tl;
616  if (tl > 0) {
617  rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
618  rc->layer_pfb[i] =
619  1000.0 *
620  (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
621  (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
622  }
623  rc->layer_input_frames[tl] = 0;
624  rc->layer_enc_frames[tl] = 0;
625  rc->layer_encoding_bitrate[i] = 0.0;
626  rc->layer_avg_frame_size[i] = 0.0;
627  rc->layer_avg_rate_mismatch[i] = 0.0;
628  }
629  }
630  rc->window_count = 0;
631  rc->window_size = 15;
632  rc->avg_st_encoding_bitrate = 0.0;
633  rc->variance_st_encoding_bitrate = 0.0;
634 }
635 
636 static void printout_rate_control_summary(struct RateControlMetrics *rc,
637  int frame_cnt, int ss_number_layers,
638  int ts_number_layers) {
639  int tot_num_frames = 0;
640  double perc_fluctuation = 0.0;
641  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
642  printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
643  for (int sl = 0; sl < ss_number_layers; ++sl) {
644  tot_num_frames = 0;
645  for (int tl = 0; tl < ts_number_layers; ++tl) {
646  int i = sl * ts_number_layers + tl;
647  const int num_dropped =
648  tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
649  : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
650  tot_num_frames += rc->layer_input_frames[tl];
651  rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
652  rc->layer_encoding_bitrate[i] /
653  tot_num_frames;
654  rc->layer_avg_frame_size[i] =
655  rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
656  rc->layer_avg_rate_mismatch[i] =
657  100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
658  printf("For layer#: %d %d \n", sl, tl);
659  printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
660  rc->layer_encoding_bitrate[i]);
661  printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
662  rc->layer_avg_frame_size[i]);
663  printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
664  printf(
665  "Number of input frames, encoded (non-key) frames, "
666  "and perc dropped frames: %d %d %f\n",
667  rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
668  100.0 * num_dropped / rc->layer_input_frames[tl]);
669  printf("\n");
670  }
671  }
672  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
673  rc->variance_st_encoding_bitrate =
674  rc->variance_st_encoding_bitrate / rc->window_count -
675  (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
676  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
677  rc->avg_st_encoding_bitrate;
678  printf("Short-time stats, for window of %d frames:\n", rc->window_size);
679  printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
680  rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
681  perc_fluctuation);
682  if (frame_cnt - 1 != tot_num_frames)
683  die("Error: Number of input frames not equal to output!\n");
684 }
685 
686 // Layer pattern configuration.
687 static void set_layer_pattern(
688  int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
689  aom_svc_ref_frame_config_t *ref_frame_config,
690  aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
691  int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) {
692  // Setting this flag to 1 enables simplex example of
693  // RPS (Reference Picture Selection) for 1 layer.
694  int use_rps_example = 0;
695  int i;
696  int enable_longterm_temporal_ref = 1;
697  int shift = (layering_mode == 8) ? 2 : 0;
698  int simulcast_mode = (layering_mode == 11);
699  *use_svc_control = 1;
700  layer_id->spatial_layer_id = spatial_layer_id;
701  int lag_index = 0;
702  int base_count = superframe_cnt >> 2;
703  ref_frame_comp_pred->use_comp_pred[0] = 0; // GOLDEN_LAST
704  ref_frame_comp_pred->use_comp_pred[1] = 0; // LAST2_LAST
705  ref_frame_comp_pred->use_comp_pred[2] = 0; // ALTREF_LAST
706  // Set the reference map buffer idx for the 7 references:
707  // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
708  // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
709  for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
710  for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
711  for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
712 
713  if (ksvc_mode) {
714  // Same pattern as case 9, but the reference strucutre will be constrained
715  // below.
716  layering_mode = 9;
717  }
718  switch (layering_mode) {
719  case 0:
720  if (use_rps_example == 0) {
721  // 1-layer: update LAST on every frame, reference LAST.
722  layer_id->temporal_layer_id = 0;
723  layer_id->spatial_layer_id = 0;
724  ref_frame_config->refresh[0] = 1;
725  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
726  } else {
727  // Pattern of 2 references (ALTREF and GOLDEN) trailing
728  // LAST by 4 and 8 frames, with some switching logic to
729  // sometimes only predict from the longer-term reference
730  //(golden here). This is simple example to test RPS
731  // (reference picture selection).
732  int last_idx = 0;
733  int last_idx_refresh = 0;
734  int gld_idx = 0;
735  int alt_ref_idx = 0;
736  int lag_alt = 4;
737  int lag_gld = 8;
738  layer_id->temporal_layer_id = 0;
739  layer_id->spatial_layer_id = 0;
740  int sh = 8; // slots 0 - 7.
741  // Moving index slot for last: 0 - (sh - 1)
742  if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
743  // Moving index for refresh of last: one ahead for next frame.
744  last_idx_refresh = superframe_cnt % sh;
745  // Moving index for gld_ref, lag behind current by lag_gld
746  if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
747  // Moving index for alt_ref, lag behind LAST by lag_alt frames.
748  if (superframe_cnt > lag_alt)
749  alt_ref_idx = (superframe_cnt - lag_alt) % sh;
750  // Set the ref_idx.
751  // Default all references to slot for last.
752  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
753  ref_frame_config->ref_idx[i] = last_idx;
754  // Set the ref_idx for the relevant references.
755  ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
756  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
757  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
758  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
759  // Refresh this slot, which will become LAST on next frame.
760  ref_frame_config->refresh[last_idx_refresh] = 1;
761  // Reference LAST, ALTREF, and GOLDEN
762  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
763  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
764  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
765  // Switch to only GOLDEN every 300 frames.
766  if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
767  ref_frame_config->reference[SVC_LAST_FRAME] = 0;
768  ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
769  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
770  // Test if the long-term is LAST instead, this is just a renaming
771  // but its tests if encoder behaves the same, whether its
772  // LAST or GOLDEN.
773  if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
774  ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
775  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
776  ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
777  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
778  }
779  }
780  }
781  break;
782  case 1:
783  // 2-temporal layer.
784  // 1 3 5
785  // 0 2 4
786  // Keep golden fixed at slot 3.
787  base_count = superframe_cnt >> 1;
788  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
789  // Cyclically refresh slots 5, 6, 7, for lag alt ref.
790  lag_index = 5;
791  if (base_count > 0) {
792  lag_index = 5 + (base_count % 3);
793  if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
794  }
795  // Set the altref slot to lag_index.
796  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
797  if (superframe_cnt % 2 == 0) {
798  layer_id->temporal_layer_id = 0;
799  // Update LAST on layer 0, reference LAST.
800  ref_frame_config->refresh[0] = 1;
801  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
802  // Refresh lag_index slot, needed for lagging golen.
803  ref_frame_config->refresh[lag_index] = 1;
804  // Refresh GOLDEN every x base layer frames.
805  if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
806  } else {
807  layer_id->temporal_layer_id = 1;
808  // No updates on layer 1, reference LAST (TL0).
809  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
810  }
811  // Always reference golden and altref on TL0.
812  if (layer_id->temporal_layer_id == 0) {
813  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
814  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
815  }
816  break;
817  case 2:
818  // 3-temporal layer:
819  // 1 3 5 7
820  // 2 6
821  // 0 4 8
822  if (superframe_cnt % 4 == 0) {
823  // Base layer.
824  layer_id->temporal_layer_id = 0;
825  // Update LAST on layer 0, reference LAST.
826  ref_frame_config->refresh[0] = 1;
827  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
828  } else if ((superframe_cnt - 1) % 4 == 0) {
829  layer_id->temporal_layer_id = 2;
830  // First top layer: no updates, only reference LAST (TL0).
831  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
832  } else if ((superframe_cnt - 2) % 4 == 0) {
833  layer_id->temporal_layer_id = 1;
834  // Middle layer (TL1): update LAST2, only reference LAST (TL0).
835  ref_frame_config->refresh[1] = 1;
836  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
837  } else if ((superframe_cnt - 3) % 4 == 0) {
838  layer_id->temporal_layer_id = 2;
839  // Second top layer: no updates, only reference LAST.
840  // Set buffer idx for LAST to slot 1, since that was the slot
841  // updated in previous frame. So LAST is TL1 frame.
842  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
843  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
844  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
845  }
846  break;
847  case 3:
848  // 3 TL, same as above, except allow for predicting
849  // off 2 more references (GOLDEN and ALTREF), with
850  // GOLDEN updated periodically, and ALTREF lagging from
851  // LAST from ~4 frames. Both GOLDEN and ALTREF
852  // can only be updated on base temporal layer.
853 
854  // Keep golden fixed at slot 3.
855  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
856  // Cyclically refresh slots 5, 6, 7, for lag altref.
857  lag_index = 5;
858  if (base_count > 0) {
859  lag_index = 5 + (base_count % 3);
860  if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
861  }
862  // Set the altref slot to lag_index.
863  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
864  if (superframe_cnt % 4 == 0) {
865  // Base layer.
866  layer_id->temporal_layer_id = 0;
867  // Update LAST on layer 0, reference LAST.
868  ref_frame_config->refresh[0] = 1;
869  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
870  // Refresh GOLDEN every x ~10 base layer frames.
871  if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
872  // Refresh lag_index slot, needed for lagging altref.
873  ref_frame_config->refresh[lag_index] = 1;
874  } else if ((superframe_cnt - 1) % 4 == 0) {
875  layer_id->temporal_layer_id = 2;
876  // First top layer: no updates, only reference LAST (TL0).
877  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
878  } else if ((superframe_cnt - 2) % 4 == 0) {
879  layer_id->temporal_layer_id = 1;
880  // Middle layer (TL1): update LAST2, only reference LAST (TL0).
881  ref_frame_config->refresh[1] = 1;
882  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
883  } else if ((superframe_cnt - 3) % 4 == 0) {
884  layer_id->temporal_layer_id = 2;
885  // Second top layer: no updates, only reference LAST.
886  // Set buffer idx for LAST to slot 1, since that was the slot
887  // updated in previous frame. So LAST is TL1 frame.
888  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
889  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
890  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
891  }
892  // Every frame can reference GOLDEN AND ALTREF.
893  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
894  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
895  // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
896  if (speed >= 7) {
897  ref_frame_comp_pred->use_comp_pred[2] = 1;
898  ref_frame_comp_pred->use_comp_pred[0] = 1;
899  }
900  break;
901  case 4:
902  // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
903  // only reference GF (not LAST). Other frames only reference LAST.
904  // 1 3 5 7
905  // 2 6
906  // 0 4 8
907  if (superframe_cnt % 4 == 0) {
908  // Base layer.
909  layer_id->temporal_layer_id = 0;
910  // Update LAST on layer 0, only reference LAST.
911  ref_frame_config->refresh[0] = 1;
912  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
913  } else if ((superframe_cnt - 1) % 4 == 0) {
914  layer_id->temporal_layer_id = 2;
915  // First top layer: no updates, only reference LAST (TL0).
916  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
917  } else if ((superframe_cnt - 2) % 4 == 0) {
918  layer_id->temporal_layer_id = 1;
919  // Middle layer (TL1): update GF, only reference LAST (TL0).
920  ref_frame_config->refresh[3] = 1;
921  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
922  } else if ((superframe_cnt - 3) % 4 == 0) {
923  layer_id->temporal_layer_id = 2;
924  // Second top layer: no updates, only reference GF.
925  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
926  }
927  break;
928  case 5:
929  // 2 spatial layers, 1 temporal.
930  layer_id->temporal_layer_id = 0;
931  if (layer_id->spatial_layer_id == 0) {
932  // Reference LAST, update LAST.
933  ref_frame_config->refresh[0] = 1;
934  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
935  } else if (layer_id->spatial_layer_id == 1) {
936  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
937  // and GOLDEN to slot 0. Update slot 1 (LAST).
938  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
939  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
940  ref_frame_config->refresh[1] = 1;
941  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
942  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
943  }
944  break;
945  case 6:
946  // 3 spatial layers, 1 temporal.
947  // Note for this case, we set the buffer idx for all references to be
948  // either LAST or GOLDEN, which are always valid references, since decoder
949  // will check if any of the 7 references is valid scale in
950  // valid_ref_frame_size().
951  layer_id->temporal_layer_id = 0;
952  if (layer_id->spatial_layer_id == 0) {
953  // Reference LAST, update LAST. Set all buffer_idx to 0.
954  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
955  ref_frame_config->ref_idx[i] = 0;
956  ref_frame_config->refresh[0] = 1;
957  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
958  } else if (layer_id->spatial_layer_id == 1) {
959  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
960  // and GOLDEN (and all other refs) to slot 0.
961  // Update slot 1 (LAST).
962  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
963  ref_frame_config->ref_idx[i] = 0;
964  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
965  ref_frame_config->refresh[1] = 1;
966  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
967  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
968  } else if (layer_id->spatial_layer_id == 2) {
969  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
970  // and GOLDEN (and all other refs) to slot 1.
971  // Update slot 2 (LAST).
972  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
973  ref_frame_config->ref_idx[i] = 1;
974  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
975  ref_frame_config->refresh[2] = 1;
976  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
977  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
978  // For 3 spatial layer case: allow for top spatial layer to use
979  // additional temporal reference. Update every 10 frames.
980  if (enable_longterm_temporal_ref) {
981  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
982  ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
983  if (base_count % 10 == 0)
984  ref_frame_config->refresh[REF_FRAMES - 1] = 1;
985  }
986  }
987  break;
988  case 7:
989  // 2 spatial and 3 temporal layer.
990  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
991  if (superframe_cnt % 4 == 0) {
992  // Base temporal layer
993  layer_id->temporal_layer_id = 0;
994  if (layer_id->spatial_layer_id == 0) {
995  // Reference LAST, update LAST
996  // Set all buffer_idx to 0
997  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
998  ref_frame_config->ref_idx[i] = 0;
999  ref_frame_config->refresh[0] = 1;
1000  } else if (layer_id->spatial_layer_id == 1) {
1001  // Reference LAST and GOLDEN.
1002  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1003  ref_frame_config->ref_idx[i] = 0;
1004  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1005  ref_frame_config->refresh[1] = 1;
1006  }
1007  } else if ((superframe_cnt - 1) % 4 == 0) {
1008  // First top temporal enhancement layer.
1009  layer_id->temporal_layer_id = 2;
1010  if (layer_id->spatial_layer_id == 0) {
1011  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1012  ref_frame_config->ref_idx[i] = 0;
1013  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1014  ref_frame_config->refresh[3] = 1;
1015  } else if (layer_id->spatial_layer_id == 1) {
1016  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1017  // GOLDEN (and all other refs) to slot 3.
1018  // No update.
1019  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1020  ref_frame_config->ref_idx[i] = 3;
1021  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1022  }
1023  } else if ((superframe_cnt - 2) % 4 == 0) {
1024  // Middle temporal enhancement layer.
1025  layer_id->temporal_layer_id = 1;
1026  if (layer_id->spatial_layer_id == 0) {
1027  // Reference LAST.
1028  // Set all buffer_idx to 0.
1029  // Set GOLDEN to slot 5 and update slot 5.
1030  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1031  ref_frame_config->ref_idx[i] = 0;
1032  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1033  ref_frame_config->refresh[5 - shift] = 1;
1034  } else if (layer_id->spatial_layer_id == 1) {
1035  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1036  // GOLDEN (and all other refs) to slot 5.
1037  // Set LAST3 to slot 6 and update slot 6.
1038  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1039  ref_frame_config->ref_idx[i] = 5 - shift;
1040  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1041  ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1042  ref_frame_config->refresh[6 - shift] = 1;
1043  }
1044  } else if ((superframe_cnt - 3) % 4 == 0) {
1045  // Second top temporal enhancement layer.
1046  layer_id->temporal_layer_id = 2;
1047  if (layer_id->spatial_layer_id == 0) {
1048  // Set LAST to slot 5 and reference LAST.
1049  // Set GOLDEN to slot 3 and update slot 3.
1050  // Set all other buffer_idx to 0.
1051  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1052  ref_frame_config->ref_idx[i] = 0;
1053  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1054  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1055  ref_frame_config->refresh[3] = 1;
1056  } else if (layer_id->spatial_layer_id == 1) {
1057  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1058  // GOLDEN to slot 3. No update.
1059  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1060  ref_frame_config->ref_idx[i] = 0;
1061  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1062  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1063  }
1064  }
1065  break;
1066  case 8:
1067  // 3 spatial and 3 temporal layer.
1068  // Same as case 9 but overalap in the buffer slot updates.
1069  // (shift = 2). The slots 3 and 4 updated by first TL2 are
1070  // reused for update in TL1 superframe.
1071  // Note for this case, frame order hint must be disabled for
1072  // lower resolutios (operating points > 0) to be decoedable.
1073  case 9:
1074  // 3 spatial and 3 temporal layer.
1075  // No overlap in buffer updates between TL2 and TL1.
1076  // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
1077  // Set the references via the svc_ref_frame_config control.
1078  // Always reference LAST.
1079  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1080  if (superframe_cnt % 4 == 0) {
1081  // Base temporal layer.
1082  layer_id->temporal_layer_id = 0;
1083  if (layer_id->spatial_layer_id == 0) {
1084  // Reference LAST, update LAST.
1085  // Set all buffer_idx to 0.
1086  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1087  ref_frame_config->ref_idx[i] = 0;
1088  ref_frame_config->refresh[0] = 1;
1089  } else if (layer_id->spatial_layer_id == 1) {
1090  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1091  // GOLDEN (and all other refs) to slot 0.
1092  // Update slot 1 (LAST).
1093  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1094  ref_frame_config->ref_idx[i] = 0;
1095  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1096  ref_frame_config->refresh[1] = 1;
1097  } else if (layer_id->spatial_layer_id == 2) {
1098  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1099  // GOLDEN (and all other refs) to slot 1.
1100  // Update slot 2 (LAST).
1101  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1102  ref_frame_config->ref_idx[i] = 1;
1103  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1104  ref_frame_config->refresh[2] = 1;
1105  }
1106  } else if ((superframe_cnt - 1) % 4 == 0) {
1107  // First top temporal enhancement layer.
1108  layer_id->temporal_layer_id = 2;
1109  if (layer_id->spatial_layer_id == 0) {
1110  // Reference LAST (slot 0).
1111  // Set GOLDEN to slot 3 and update slot 3.
1112  // Set all other buffer_idx to slot 0.
1113  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1114  ref_frame_config->ref_idx[i] = 0;
1115  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1116  ref_frame_config->refresh[3] = 1;
1117  } else if (layer_id->spatial_layer_id == 1) {
1118  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1119  // GOLDEN (and all other refs) to slot 3.
1120  // Set LAST2 to slot 4 and Update slot 4.
1121  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1122  ref_frame_config->ref_idx[i] = 3;
1123  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1124  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1125  ref_frame_config->refresh[4] = 1;
1126  } else if (layer_id->spatial_layer_id == 2) {
1127  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1128  // GOLDEN (and all other refs) to slot 4.
1129  // No update.
1130  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1131  ref_frame_config->ref_idx[i] = 4;
1132  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1133  }
1134  } else if ((superframe_cnt - 2) % 4 == 0) {
1135  // Middle temporal enhancement layer.
1136  layer_id->temporal_layer_id = 1;
1137  if (layer_id->spatial_layer_id == 0) {
1138  // Reference LAST.
1139  // Set all buffer_idx to 0.
1140  // Set GOLDEN to slot 5 and update slot 5.
1141  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1142  ref_frame_config->ref_idx[i] = 0;
1143  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1144  ref_frame_config->refresh[5 - shift] = 1;
1145  } else if (layer_id->spatial_layer_id == 1) {
1146  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1147  // GOLDEN (and all other refs) to slot 5.
1148  // Set LAST3 to slot 6 and update slot 6.
1149  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1150  ref_frame_config->ref_idx[i] = 5 - shift;
1151  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1152  ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1153  ref_frame_config->refresh[6 - shift] = 1;
1154  } else if (layer_id->spatial_layer_id == 2) {
1155  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1156  // GOLDEN (and all other refs) to slot 6.
1157  // Set LAST3 to slot 7 and update slot 7.
1158  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1159  ref_frame_config->ref_idx[i] = 6 - shift;
1160  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1161  ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
1162  ref_frame_config->refresh[7 - shift] = 1;
1163  }
1164  } else if ((superframe_cnt - 3) % 4 == 0) {
1165  // Second top temporal enhancement layer.
1166  layer_id->temporal_layer_id = 2;
1167  if (layer_id->spatial_layer_id == 0) {
1168  // Set LAST to slot 5 and reference LAST.
1169  // Set GOLDEN to slot 3 and update slot 3.
1170  // Set all other buffer_idx to 0.
1171  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1172  ref_frame_config->ref_idx[i] = 0;
1173  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1174  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1175  ref_frame_config->refresh[3] = 1;
1176  } else if (layer_id->spatial_layer_id == 1) {
1177  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1178  // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
1179  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1180  ref_frame_config->ref_idx[i] = 0;
1181  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1182  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1183  ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1184  ref_frame_config->refresh[4] = 1;
1185  } else if (layer_id->spatial_layer_id == 2) {
1186  // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
1187  // GOLDEN to slot 4. No update.
1188  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1189  ref_frame_config->ref_idx[i] = 0;
1190  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
1191  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
1192  }
1193  }
1194  break;
1195  case 11:
1196  // Simulcast mode for 3 spatial and 3 temporal layers.
1197  // No inter-layer predicton, only prediction is temporal and single
1198  // reference (LAST).
1199  // No overlap in buffer slots between spatial layers. So for example,
1200  // SL0 only uses slots 0 and 1.
1201  // SL1 only uses slots 2 and 3.
1202  // SL2 only uses slots 4 and 5.
1203  // All 7 references for each inter-frame must only access buffer slots
1204  // for that spatial layer.
1205  // On key (super)frames: SL1 and SL2 must have no references set
1206  // and must refresh all the slots for that layer only (so 2 and 3
1207  // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
1208  // as a Key frame (refresh all slots). SL1/SL2 will be labelled
1209  // internally as Intra-only frames that allow that stream to be decoded.
1210  // These conditions will allow for each spatial stream to be
1211  // independently decodeable.
1212 
1213  // Initialize all references to 0 (don't use reference).
1214  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1215  ref_frame_config->reference[i] = 0;
1216  // Initialize as no refresh/update for all slots.
1217  for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
1218  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1219  ref_frame_config->ref_idx[i] = 0;
1220 
1221  if (is_key_frame) {
1222  if (layer_id->spatial_layer_id == 0) {
1223  // Assign LAST/GOLDEN to slot 0/1.
1224  // Refesh slots 0 and 1 for SL0.
1225  // SL0: this will get set to KEY frame internally.
1226  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1227  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
1228  ref_frame_config->refresh[0] = 1;
1229  ref_frame_config->refresh[1] = 1;
1230  } else if (layer_id->spatial_layer_id == 1) {
1231  // Assign LAST/GOLDEN to slot 2/3.
1232  // Refesh slots 2 and 3 for SL1.
1233  // This will get set to Intra-only frame internally.
1234  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1235  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1236  ref_frame_config->refresh[2] = 1;
1237  ref_frame_config->refresh[3] = 1;
1238  } else if (layer_id->spatial_layer_id == 2) {
1239  // Assign LAST/GOLDEN to slot 4/5.
1240  // Refresh slots 4 and 5 for SL2.
1241  // This will get set to Intra-only frame internally.
1242  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1243  ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
1244  ref_frame_config->refresh[4] = 1;
1245  ref_frame_config->refresh[5] = 1;
1246  }
1247  } else if (superframe_cnt % 4 == 0) {
1248  // Base temporal layer: TL0
1249  layer_id->temporal_layer_id = 0;
1250  if (layer_id->spatial_layer_id == 0) { // SL0
1251  // Reference LAST. Assign all references to either slot
1252  // 0 or 1. Here we assign LAST to slot 0, all others to 1.
1253  // Update slot 0 (LAST).
1254  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1255  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1256  ref_frame_config->ref_idx[i] = 1;
1257  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1258  ref_frame_config->refresh[0] = 1;
1259  } else if (layer_id->spatial_layer_id == 1) { // SL1
1260  // Reference LAST. Assign all references to either slot
1261  // 2 or 3. Here we assign LAST to slot 2, all others to 3.
1262  // Update slot 2 (LAST).
1263  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1264  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1265  ref_frame_config->ref_idx[i] = 3;
1266  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1267  ref_frame_config->refresh[2] = 1;
1268  } else if (layer_id->spatial_layer_id == 2) { // SL2
1269  // Reference LAST. Assign all references to either slot
1270  // 4 or 5. Here we assign LAST to slot 4, all others to 5.
1271  // Update slot 4 (LAST).
1272  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1273  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1274  ref_frame_config->ref_idx[i] = 5;
1275  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1276  ref_frame_config->refresh[4] = 1;
1277  }
1278  } else if ((superframe_cnt - 1) % 4 == 0) {
1279  // First top temporal enhancement layer: TL2
1280  layer_id->temporal_layer_id = 2;
1281  if (layer_id->spatial_layer_id == 0) { // SL0
1282  // Reference LAST (slot 0). Assign other references to slot 1.
1283  // No update/refresh on any slots.
1284  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1285  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1286  ref_frame_config->ref_idx[i] = 1;
1287  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1288  } else if (layer_id->spatial_layer_id == 1) { // SL1
1289  // Reference LAST (slot 2). Assign other references to slot 3.
1290  // No update/refresh on any slots.
1291  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1292  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1293  ref_frame_config->ref_idx[i] = 3;
1294  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1295  } else if (layer_id->spatial_layer_id == 2) { // SL2
1296  // Reference LAST (slot 4). Assign other references to slot 4.
1297  // No update/refresh on any slots.
1298  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1299  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1300  ref_frame_config->ref_idx[i] = 5;
1301  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1302  }
1303  } else if ((superframe_cnt - 2) % 4 == 0) {
1304  // Middle temporal enhancement layer: TL1
1305  layer_id->temporal_layer_id = 1;
1306  if (layer_id->spatial_layer_id == 0) { // SL0
1307  // Reference LAST (slot 0).
1308  // Set GOLDEN to slot 1 and update slot 1.
1309  // This will be used as reference for next TL2.
1310  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1311  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1312  ref_frame_config->ref_idx[i] = 1;
1313  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1314  ref_frame_config->refresh[1] = 1;
1315  } else if (layer_id->spatial_layer_id == 1) { // SL1
1316  // Reference LAST (slot 2).
1317  // Set GOLDEN to slot 3 and update slot 3.
1318  // This will be used as reference for next TL2.
1319  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1320  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1321  ref_frame_config->ref_idx[i] = 3;
1322  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1323  ref_frame_config->refresh[3] = 1;
1324  } else if (layer_id->spatial_layer_id == 2) { // SL2
1325  // Reference LAST (slot 4).
1326  // Set GOLDEN to slot 5 and update slot 5.
1327  // This will be used as reference for next TL2.
1328  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1329  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1330  ref_frame_config->ref_idx[i] = 5;
1331  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1332  ref_frame_config->refresh[5] = 1;
1333  }
1334  } else if ((superframe_cnt - 3) % 4 == 0) {
1335  // Second top temporal enhancement layer: TL2
1336  layer_id->temporal_layer_id = 2;
1337  if (layer_id->spatial_layer_id == 0) { // SL0
1338  // Reference LAST (slot 1). Assign other references to slot 0.
1339  // No update/refresh on any slots.
1340  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1341  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1342  ref_frame_config->ref_idx[i] = 0;
1343  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1344  } else if (layer_id->spatial_layer_id == 1) { // SL1
1345  // Reference LAST (slot 3). Assign other references to slot 2.
1346  // No update/refresh on any slots.
1347  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1348  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1349  ref_frame_config->ref_idx[i] = 2;
1350  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
1351  } else if (layer_id->spatial_layer_id == 2) { // SL2
1352  // Reference LAST (slot 5). Assign other references to slot 4.
1353  // No update/refresh on any slots.
1354  ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1355  for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1356  ref_frame_config->ref_idx[i] = 4;
1357  ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
1358  }
1359  }
1360  if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
1361  // Always reference GOLDEN (inter-layer prediction).
1362  ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1363  if (ksvc_mode) {
1364  // KSVC: only keep the inter-layer reference (GOLDEN) for
1365  // superframes whose base is key.
1366  if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
1367  }
1368  if (is_key_frame && layer_id->spatial_layer_id > 1) {
1369  // On superframes whose base is key: remove LAST to avoid prediction
1370  // off layer two levels below.
1371  ref_frame_config->reference[SVC_LAST_FRAME] = 0;
1372  }
1373  }
1374  // For 3 spatial layer case 8 (where there is free buffer slot):
1375  // allow for top spatial layer to use additional temporal reference.
1376  // Additional reference is only updated on base temporal layer, every
1377  // 10 TL0 frames here.
1378  if (!simulcast_mode && enable_longterm_temporal_ref &&
1379  layer_id->spatial_layer_id == 2 && layering_mode == 8) {
1380  ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1381  if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1382  if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
1383  ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1384  }
1385  break;
1386  default: assert(0); die("Error: Unsupported temporal layering mode!\n");
1387  }
1388 }
1389 
1390 static void write_literal(struct aom_write_bit_buffer *wb, uint32_t data,
1391  uint8_t bits, uint32_t offset = 0) {
1392  if (bits > 32) {
1393  die("Invalid bits value %d > 32\n", bits);
1394  }
1395  const uint32_t max = static_cast<uint32_t>(((uint64_t)1 << bits) - 1);
1396  if (data < offset || (data - offset) > max) {
1397  die("Invalid data, value %u out of range [%u, %" PRIu64 "]\n", data, offset,
1398  (uint64_t)max + offset);
1399  }
1400  aom_wb_write_unsigned_literal(wb, data - offset, bits);
1401 }
1402 
1403 static void write_depth_representation_element(
1404  struct aom_write_bit_buffer *buffer,
1405  const std::pair<libaom_examples::DepthRepresentationElement, bool>
1406  &element) {
1407  if (!element.second) {
1408  return;
1409  }
1410  write_literal(buffer, element.first.sign_flag, 1);
1411  write_literal(buffer, element.first.exponent, 7);
1412  if (element.first.mantissa_len == 0 || element.first.mantissa_len > 32) {
1413  die("Invalid mantissan_len %d\n", element.first.mantissa_len);
1414  }
1415  write_literal(buffer, element.first.mantissa_len - 1, 5);
1416  write_literal(buffer, element.first.mantissa, element.first.mantissa_len);
1417 }
1418 
1419 static void write_color_properties(
1420  struct aom_write_bit_buffer *buffer,
1421  const std::pair<libaom_examples::ColorProperties, bool> &color_properties) {
1422  write_literal(buffer, color_properties.second, 1);
1423  if (color_properties.second) {
1424  write_literal(buffer, color_properties.first.color_range, 1);
1425  write_literal(buffer, color_properties.first.color_primaries, 8);
1426  write_literal(buffer, color_properties.first.transfer_characteristics, 8);
1427  write_literal(buffer, color_properties.first.matrix_coefficients, 8);
1428  } else {
1429  write_literal(buffer, 0, 1); // reserved_1bit
1430  }
1431 }
1432 
1433 static void add_multilayer_metadata(
1434  aom_image_t *frame, const libaom_examples::MultilayerMetadata &multilayer) {
1435  // Pretty large buffer to accommodate the largest multilayer metadata
1436  // possible, with 4 alpha segmentation layers (each can be up to about 66kB).
1437  std::vector<uint8_t> data(66000 * multilayer.layers.size());
1438  struct aom_write_bit_buffer buffer = { data.data(), 0 };
1439 
1440  write_literal(&buffer, multilayer.use_case, 6);
1441  if (multilayer.layers.empty()) {
1442  die("Invalid multilayer metadata, no layers found\n");
1443  } else if (multilayer.layers.size() > MAX_NUM_SPATIAL_LAYERS) {
1444  die("Invalid multilayer metadata, too many layers (max is %d)\n",
1445  MAX_NUM_SPATIAL_LAYERS);
1446  }
1447  write_literal(&buffer, (int)multilayer.layers.size() - 1, 2);
1448  assert(buffer.bit_offset % 8 == 0);
1449  for (size_t i = 0; i < multilayer.layers.size(); ++i) {
1450  const libaom_examples::LayerMetadata &layer = multilayer.layers[i];
1451  // Alpha info with segmentation with labels can be up to about 66k bytes,
1452  // which requires 3 bytes to encode in leb128.
1453  const int bytes_reserved_for_size = 3;
1454  // Placeholder for layer_metadata_size which will be written later.
1455  write_literal(&buffer, 0, bytes_reserved_for_size * 8);
1456  const uint32_t metadata_start = buffer.bit_offset;
1457  write_literal(&buffer, (int)i, 2); // ml_spatial_id
1458  write_literal(&buffer, layer.layer_type, 5);
1459  write_literal(&buffer, layer.luma_plane_only_flag, 1);
1460  write_literal(&buffer, layer.layer_view_type, 3);
1461  write_literal(&buffer, layer.group_id, 2);
1462  write_literal(&buffer, layer.layer_dependency_idc, 3);
1463  write_literal(&buffer, layer.layer_metadata_scope, 2);
1464  write_literal(&buffer, 0, 4); // ml_reserved_4bits
1465 
1466  if (i > 0) {
1467  write_color_properties(&buffer, layer.layer_color_description);
1468  } else {
1469  write_literal(&buffer, 0, 2); // ml_reserved_2bits
1470  }
1471  assert(buffer.bit_offset % 8 == 0);
1472 
1473  if (layer.layer_type == libaom_examples::MULTILAYER_LAYER_TYPE_ALPHA &&
1474  layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
1475  const libaom_examples::AlphaInformation &alpha_info =
1476  layer.global_alpha_info;
1477  write_literal(&buffer, alpha_info.alpha_use_idc, 3);
1478  write_literal(&buffer, alpha_info.alpha_bit_depth, 3, /*offset=*/8);
1479  write_literal(&buffer, alpha_info.alpha_clip_idc, 2);
1480  write_literal(&buffer, alpha_info.alpha_incr_flag, 1);
1481  write_literal(&buffer, alpha_info.alpha_transparent_value,
1482  alpha_info.alpha_bit_depth + 1);
1483  write_literal(&buffer, alpha_info.alpha_opaque_value,
1484  alpha_info.alpha_bit_depth + 1);
1485  if (buffer.bit_offset % 8 != 0) {
1486  // ai_byte_alignment_bits
1487  write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
1488  }
1489  assert(buffer.bit_offset % 8 == 0);
1490 
1491  if (alpha_info.alpha_use_idc == libaom_examples::ALPHA_STRAIGHT) {
1492  write_literal(&buffer, 0, 6); // ai_reserved_6bits
1493  write_color_properties(&buffer, alpha_info.alpha_color_description);
1494  } else if (alpha_info.alpha_use_idc ==
1495  libaom_examples::ALPHA_SEGMENTATION) {
1496  write_literal(&buffer, 0, 7); // ai_reserved_7bits
1497  write_literal(&buffer, !alpha_info.label_type_id.empty(), 1);
1498  if (!alpha_info.label_type_id.empty()) {
1499  const size_t num_values =
1500  std::abs(alpha_info.alpha_transparent_value -
1501  alpha_info.alpha_opaque_value) +
1502  1;
1503  if (!alpha_info.label_type_id.empty() &&
1504  alpha_info.label_type_id.size() != num_values) {
1505  die("Invalid multilayer metadata, label_type_id size must be "
1506  "equal to the range of alpha values between "
1507  "alpha_transparent_value and alpha_opaque_value (expected "
1508  "%d values, found %d values)\n",
1509  (int)num_values, (int)alpha_info.label_type_id.size());
1510  }
1511  for (size_t j = 0; j < num_values; ++j) {
1512  write_literal(&buffer, alpha_info.label_type_id[j], 16);
1513  }
1514  }
1515  }
1516  assert(buffer.bit_offset % 8 == 0);
1517  } else if (layer.layer_type ==
1518  libaom_examples::MULTILAYER_LAYER_TYPE_DEPTH &&
1519  layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
1520  const libaom_examples::DepthInformation &depth_info =
1521  layer.global_depth_info;
1522  write_literal(&buffer, depth_info.z_near.second, 1);
1523  write_literal(&buffer, depth_info.z_far.second, 1);
1524  write_literal(&buffer, depth_info.d_min.second, 1);
1525  write_literal(&buffer, depth_info.d_max.second, 1);
1526  write_literal(&buffer, depth_info.depth_representation_type, 4);
1527  if (depth_info.d_min.second || depth_info.d_max.second) {
1528  write_literal(&buffer, depth_info.disparity_ref_view_id, 2);
1529  }
1530  write_depth_representation_element(&buffer, depth_info.z_near);
1531  write_depth_representation_element(&buffer, depth_info.z_far);
1532  write_depth_representation_element(&buffer, depth_info.d_min);
1533  write_depth_representation_element(&buffer, depth_info.d_max);
1534  if (depth_info.depth_representation_type == 3) {
1535  write_literal(&buffer, depth_info.depth_nonlinear_precision, 4,
1536  /*offset=*/8);
1537  if (depth_info.depth_nonlinear_representation_model.empty() ||
1538  depth_info.depth_nonlinear_representation_model.size() > (1 << 6)) {
1539  die("Invalid multilayer metadata, if depth_nonlinear_precision "
1540  "== 3, depth_nonlinear_representation_model must have 1 to "
1541  "%d elements, found %d elements\n",
1542  1 << 6,
1543  (int)depth_info.depth_nonlinear_representation_model.size());
1544  }
1545  write_literal(
1546  &buffer,
1547  (int)depth_info.depth_nonlinear_representation_model.size() - 1, 6);
1548  const int bit_depth = depth_info.depth_nonlinear_precision;
1549  for (const uint32_t v :
1550  depth_info.depth_nonlinear_representation_model) {
1551  write_literal(&buffer, v, bit_depth);
1552  }
1553  }
1554  if (buffer.bit_offset % 8 != 0) {
1555  write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
1556  }
1557  assert(buffer.bit_offset % 8 == 0);
1558  }
1559 
1560  assert(buffer.bit_offset % 8 == 0);
1561 
1562  const int metadata_size_bytes = (buffer.bit_offset - metadata_start) / 8;
1563  const uint8_t size_pos = metadata_start / 8 - bytes_reserved_for_size;
1564  size_t coded_size;
1565  if (aom_uleb_encode_fixed_size(metadata_size_bytes, bytes_reserved_for_size,
1566  bytes_reserved_for_size,
1567  &buffer.bit_buffer[size_pos], &coded_size)) {
1568  // Need to increase bytes_reserved_for_size in the code above.
1569  die("Error: Failed to write metadata size\n");
1570  }
1571  }
1572  assert(buffer.bit_offset % 8 == 0);
1573  if (aom_img_add_metadata(frame, 33 /*METADATA_TYPE_MULTILAYER*/,
1574  buffer.bit_buffer, buffer.bit_offset / 8,
1575  AOM_MIF_KEY_FRAME)) {
1576  die("Error: Failed to add metadata\n");
1577  }
1578 }
1579 
1580 #if CONFIG_AV1_DECODER
1581 // Returns whether there is a mismatch between the encoder's new frame and the
1582 // decoder's new frame.
1583 static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
1584  const int frames_out) {
1585  aom_image_t enc_img, dec_img;
1586  int mismatch = 0;
1587 
1588  /* Get the internal new frame */
1591 
1592 #if CONFIG_AV1_HIGHBITDEPTH
1593  if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
1594  (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
1595  if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1596  aom_image_t enc_hbd_img;
1597  aom_img_alloc(
1598  &enc_hbd_img,
1599  static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1600  enc_img.d_w, enc_img.d_h, 16);
1601  aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
1602  enc_img = enc_hbd_img;
1603  }
1604  if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1605  aom_image_t dec_hbd_img;
1606  aom_img_alloc(
1607  &dec_hbd_img,
1608  static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1609  dec_img.d_w, dec_img.d_h, 16);
1610  aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
1611  dec_img = dec_hbd_img;
1612  }
1613  }
1614 #endif
1615 
1616  if (!aom_compare_img(&enc_img, &dec_img)) {
1617  int y[4], u[4], v[4];
1618 #if CONFIG_AV1_HIGHBITDEPTH
1619  if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1620  aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
1621  } else {
1622  aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1623  }
1624 #else
1625  aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1626 #endif
1627  fprintf(stderr,
1628  "Encode/decode mismatch on frame %d at"
1629  " Y[%d, %d] {%d/%d},"
1630  " U[%d, %d] {%d/%d},"
1631  " V[%d, %d] {%d/%d}\n",
1632  frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
1633  v[1], v[2], v[3]);
1634  mismatch = 1;
1635  }
1636 
1637  aom_img_free(&enc_img);
1638  aom_img_free(&dec_img);
1639  return mismatch;
1640 }
1641 #endif // CONFIG_AV1_DECODER
1642 
1643 struct psnr_stats {
1644  // The second element of these arrays is reserved for high bitdepth.
1645  uint64_t psnr_sse_total[2];
1646  uint64_t psnr_samples_total[2];
1647  double psnr_totals[2][4];
1648  int psnr_count[2];
1649 };
1650 
1651 static void show_psnr(struct psnr_stats *psnr_stream, double peak) {
1652  double ovpsnr;
1653 
1654  if (!psnr_stream->psnr_count[0]) return;
1655 
1656  fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
1657  ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak,
1658  (double)psnr_stream->psnr_sse_total[0]);
1659  fprintf(stderr, " %.3f", ovpsnr);
1660 
1661  for (int i = 0; i < 4; i++) {
1662  fprintf(stderr, " %.3f",
1663  psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]);
1664  }
1665  fprintf(stderr, "\n");
1666 }
1667 
1668 static aom::AV1RateControlRtcConfig create_rtc_rc_config(
1669  const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
1670  aom::AV1RateControlRtcConfig rc_cfg;
1671  rc_cfg.width = cfg.g_w;
1672  rc_cfg.height = cfg.g_h;
1673  rc_cfg.max_quantizer = cfg.rc_max_quantizer;
1674  rc_cfg.min_quantizer = cfg.rc_min_quantizer;
1675  rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
1676  rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
1677  rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
1678  rc_cfg.buf_sz = cfg.rc_buf_sz;
1679  rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
1680  rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
1681  // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
1682  rc_cfg.max_intra_bitrate_pct = 300;
1683  rc_cfg.framerate = cfg.g_timebase.den;
1684  // TODO(jianj): Add suppor for SVC.
1685  rc_cfg.ss_number_layers = 1;
1686  rc_cfg.ts_number_layers = 1;
1687  rc_cfg.scaling_factor_num[0] = 1;
1688  rc_cfg.scaling_factor_den[0] = 1;
1689  rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
1690  rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
1691  rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
1692  rc_cfg.aq_mode = app_input.aq_mode;
1693 
1694  return rc_cfg;
1695 }
1696 
1697 static int qindex_to_quantizer(int qindex) {
1698  // Table that converts 0-63 range Q values passed in outside to the 0-255
1699  // range Qindex used internally.
1700  static const int quantizer_to_qindex[] = {
1701  0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
1702  52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
1703  104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
1704  156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
1705  208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
1706  };
1707  for (int quantizer = 0; quantizer < 64; ++quantizer)
1708  if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
1709 
1710  return 63;
1711 }
1712 
1713 static void set_active_map(const aom_codec_enc_cfg_t *cfg,
1714  aom_codec_ctx_t *codec, int frame_cnt) {
1715  aom_active_map_t map = { 0, 0, 0 };
1716 
1717  map.rows = (cfg->g_h + 15) / 16;
1718  map.cols = (cfg->g_w + 15) / 16;
1719 
1720  map.active_map = (uint8_t *)malloc(map.rows * map.cols);
1721  if (!map.active_map) die("Failed to allocate active map");
1722 
1723  // Example map for testing.
1724  for (unsigned int i = 0; i < map.rows; ++i) {
1725  for (unsigned int j = 0; j < map.cols; ++j) {
1726  int index = map.cols * i + j;
1727  map.active_map[index] = 1;
1728  if (frame_cnt < 300) {
1729  if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0;
1730  } else if (frame_cnt >= 300) {
1731  if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0;
1732  }
1733  }
1734  }
1735 
1736  if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
1737  die_codec(codec, "Failed to set active map");
1738 
1739  free(map.active_map);
1740 }
1741 
1742 int main(int argc, const char **argv) {
1743  AppInput app_input;
1744  AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
1745  FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
1746  AvxVideoWriter *total_layer_file = NULL;
1747  FILE *total_layer_obu_file = NULL;
1748  aom_codec_enc_cfg_t cfg;
1749  int frame_cnt = 0;
1750  aom_image_t raw;
1751  int frame_avail;
1752  int got_data = 0;
1753  int flags = 0;
1754  int i;
1755  int pts = 0; // PTS starts at 0.
1756  int frame_duration = 1; // 1 timebase tick per frame.
1757  aom_svc_layer_id_t layer_id;
1758  aom_svc_params_t svc_params;
1759  aom_svc_ref_frame_config_t ref_frame_config;
1760  aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
1761 
1762 #if CONFIG_INTERNAL_STATS
1763  FILE *stats_file = fopen("opsnr.stt", "a");
1764  if (stats_file == NULL) {
1765  die("Cannot open opsnr.stt\n");
1766  }
1767 #endif
1768 #if CONFIG_AV1_DECODER
1769  aom_codec_ctx_t decoder;
1770 #endif
1771 
1772  struct RateControlMetrics rc;
1773  int64_t cx_time = 0;
1774  int64_t cx_time_layer[AOM_MAX_LAYERS]; // max number of layers.
1775  int frame_cnt_layer[AOM_MAX_LAYERS];
1776  double sum_bitrate = 0.0;
1777  double sum_bitrate2 = 0.0;
1778  double framerate = 30.0;
1779  int use_svc_control = 1;
1780  int set_err_resil_frame = 0;
1781  int test_changing_bitrate = 0;
1782  zero(rc.layer_target_bitrate);
1783  memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
1784  memset(&app_input, 0, sizeof(AppInput));
1785  memset(&svc_params, 0, sizeof(svc_params));
1786 
1787  // Flag to test dynamic scaling of source frames for single
1788  // spatial stream, using the scaling_mode control.
1789  const int test_dynamic_scaling_single_layer = 0;
1790 
1791  // Flag to test setting speed per layer.
1792  const int test_speed_per_layer = 0;
1793 
1794  // Flag for testing active maps.
1795  const int test_active_maps = 0;
1796 
1797  /* Setup default input stream settings */
1798  for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
1799  app_input.input_ctx[i].framerate.numerator = 30;
1800  app_input.input_ctx[i].framerate.denominator = 1;
1801  app_input.input_ctx[i].only_i420 = 0;
1802  app_input.input_ctx[i].bit_depth = AOM_BITS_8;
1803  }
1804  app_input.speed = 7;
1805  exec_name = argv[0];
1806 
1807  // start with default encoder configuration
1810  if (res != AOM_CODEC_OK) {
1811  die("Failed to get config: %s\n", aom_codec_err_to_string(res));
1812  }
1813 
1814  // Real time parameters.
1816 
1817  cfg.rc_end_usage = AOM_CBR;
1818  cfg.rc_min_quantizer = 2;
1819  cfg.rc_max_quantizer = 52;
1820  cfg.rc_undershoot_pct = 50;
1821  cfg.rc_overshoot_pct = 50;
1822  cfg.rc_buf_initial_sz = 600;
1823  cfg.rc_buf_optimal_sz = 600;
1824  cfg.rc_buf_sz = 1000;
1825  cfg.rc_resize_mode = 0; // Set to RESIZE_DYNAMIC for dynamic resize.
1826  cfg.g_lag_in_frames = 0;
1827  cfg.kf_mode = AOM_KF_AUTO;
1828  cfg.g_w = 0; // Force user to specify width and height for raw input.
1829  cfg.g_h = 0;
1830 
1831  parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
1832 
1833  int ts_number_layers = svc_params.number_temporal_layers;
1834  int ss_number_layers = svc_params.number_spatial_layers;
1835 
1836  unsigned int width = cfg.g_w;
1837  unsigned int height = cfg.g_h;
1838 
1839  if (app_input.layering_mode >= 0) {
1840  if (ts_number_layers !=
1841  mode_to_num_temporal_layers[app_input.layering_mode] ||
1842  ss_number_layers !=
1843  mode_to_num_spatial_layers[app_input.layering_mode]) {
1844  die("Number of layers doesn't match layering mode.");
1845  }
1846  }
1847 
1848  bool has_non_y4m_input = false;
1849  for (i = 0; i < AOM_MAX_LAYERS; ++i) {
1850  if (app_input.input_ctx[i].file_type != FILE_TYPE_Y4M) {
1851  has_non_y4m_input = true;
1852  break;
1853  }
1854  }
1855  // Y4M reader has its own allocation.
1856  if (has_non_y4m_input) {
1857  if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
1858  die("Failed to allocate image (%dx%d)", width, height);
1859  }
1860  }
1861 
1862  aom_codec_iface_t *encoder = aom_codec_av1_cx();
1863 
1864  memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
1865  sizeof(svc_params.layer_target_bitrate));
1866 
1867  unsigned int total_rate = 0;
1868  for (i = 0; i < ss_number_layers; i++) {
1869  total_rate +=
1870  svc_params
1871  .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
1872  }
1873  if (total_rate != cfg.rc_target_bitrate) {
1874  die("Incorrect total target bitrate, expected: %d", total_rate);
1875  }
1876 
1877  svc_params.framerate_factor[0] = 1;
1878  if (ts_number_layers == 2) {
1879  svc_params.framerate_factor[0] = 2;
1880  svc_params.framerate_factor[1] = 1;
1881  } else if (ts_number_layers == 3) {
1882  svc_params.framerate_factor[0] = 4;
1883  svc_params.framerate_factor[1] = 2;
1884  svc_params.framerate_factor[2] = 1;
1885  }
1886 
1887  libaom_examples::MultilayerMetadata multilayer_metadata;
1888  if (app_input.multilayer_metadata_file != NULL) {
1889  if (!libaom_examples::parse_multilayer_file(
1890  app_input.multilayer_metadata_file, &multilayer_metadata)) {
1891  die("Failed to parse multilayer metadata");
1892  }
1893  libaom_examples::print_multilayer_metadata(multilayer_metadata);
1894  }
1895 
1896  framerate = cfg.g_timebase.den / cfg.g_timebase.num;
1897  set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1898 
1899  AvxVideoInfo info;
1900  info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1901  info.frame_width = cfg.g_w;
1902  info.frame_height = cfg.g_h;
1903  info.time_base.numerator = cfg.g_timebase.num;
1904  info.time_base.denominator = cfg.g_timebase.den;
1905  // Open an output file for each stream.
1906  for (int sl = 0; sl < ss_number_layers; ++sl) {
1907  for (int tl = 0; tl < ts_number_layers; ++tl) {
1908  i = sl * ts_number_layers + tl;
1909  char file_name[PATH_MAX];
1910  snprintf(file_name, sizeof(file_name), "%s_%d.av1",
1911  app_input.output_filename, i);
1912  if (app_input.output_obu) {
1913  obu_files[i] = fopen(file_name, "wb");
1914  if (!obu_files[i]) die("Failed to open %s for writing", file_name);
1915  } else {
1916  outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1917  if (!outfile[i]) die("Failed to open %s for writing", file_name);
1918  }
1919  }
1920  }
1921  if (app_input.output_obu) {
1922  total_layer_obu_file = fopen(app_input.output_filename, "wb");
1923  if (!total_layer_obu_file)
1924  die("Failed to open %s for writing", app_input.output_filename);
1925  } else {
1926  total_layer_file =
1927  aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1928  if (!total_layer_file)
1929  die("Failed to open %s for writing", app_input.output_filename);
1930  }
1931 
1932  // Initialize codec.
1933  aom_codec_ctx_t codec;
1934  aom_codec_flags_t flag = 0;
1936  flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
1937  if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
1938  die_codec(&codec, "Failed to initialize encoder");
1939 
1940 #if CONFIG_AV1_DECODER
1941  if (app_input.decode) {
1942  if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
1943  die_codec(&decoder, "Failed to initialize decoder");
1944  }
1945 #endif
1946 
1947  aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
1948  aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
1963 
1964  // Settings to reduce key frame encoding time.
1970 
1972 
1973  aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
1974  if (app_input.tune_content == AOM_CONTENT_SCREEN) {
1976  // INTRABC is currently disabled for rt mode, as it's too slow.
1978  }
1979 
1980  if (app_input.use_external_rc) {
1982  }
1983 
1985 
1988 
1990 
1991  svc_params.number_spatial_layers = ss_number_layers;
1992  svc_params.number_temporal_layers = ts_number_layers;
1993  for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
1994  svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
1995  svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
1996  }
1997  if (!app_input.scale_factors_explicitly_set) {
1998  for (i = 0; i < ss_number_layers; ++i) {
1999  svc_params.scaling_factor_num[i] = 1;
2000  svc_params.scaling_factor_den[i] = 1;
2001  }
2002  if (ss_number_layers == 2) {
2003  svc_params.scaling_factor_num[0] = 1;
2004  svc_params.scaling_factor_den[0] = 2;
2005  } else if (ss_number_layers == 3) {
2006  svc_params.scaling_factor_num[0] = 1;
2007  svc_params.scaling_factor_den[0] = 4;
2008  svc_params.scaling_factor_num[1] = 1;
2009  svc_params.scaling_factor_den[1] = 2;
2010  }
2011  }
2012  aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
2013  // TODO(aomedia:3032): Configure KSVC in fixed mode.
2014 
2015  // This controls the maximum target size of the key frame.
2016  // For generating smaller key frames, use a smaller max_intra_size_pct
2017  // value, like 100 or 200.
2018  {
2019  const int max_intra_size_pct = 300;
2021  max_intra_size_pct);
2022  }
2023 
2024  for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
2025  cx_time_layer[lx] = 0;
2026  frame_cnt_layer[lx] = 0;
2027  }
2028 
2029  std::unique_ptr<aom::AV1RateControlRTC> rc_api;
2030  if (app_input.use_external_rc) {
2031  const aom::AV1RateControlRtcConfig rc_cfg =
2032  create_rtc_rc_config(cfg, app_input);
2033  rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
2034  }
2035 
2036  frame_avail = 1;
2037  struct psnr_stats psnr_stream;
2038  memset(&psnr_stream, 0, sizeof(psnr_stream));
2039  while (frame_avail || got_data) {
2040  struct aom_usec_timer timer;
2041  frame_avail = read_frame(&(app_input.input_ctx[0]), &raw);
2042  // Loop over spatial layers.
2043  for (int slx = 0; slx < ss_number_layers; slx++) {
2044  if (slx > 0 && app_input.input_ctx[slx].filename != NULL) {
2045  const int previous_layer_frame_avail = frame_avail;
2046  frame_avail = read_frame(&(app_input.input_ctx[slx]), &raw);
2047  if (previous_layer_frame_avail != frame_avail) {
2048  die("Mismatch in number of frames between spatial layer input files");
2049  }
2050  }
2051 
2052  aom_codec_iter_t iter = NULL;
2053  const aom_codec_cx_pkt_t *pkt;
2054  int layer = 0;
2055  // Flag for superframe whose base is key.
2056  int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
2057  // For flexible mode:
2058  if (app_input.layering_mode >= 0) {
2059  // Set the reference/update flags, layer_id, and reference_map
2060  // buffer index.
2061  set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
2062  &ref_frame_config, &ref_frame_comp_pred,
2063  &use_svc_control, slx, is_key_frame,
2064  (app_input.layering_mode == 10), app_input.speed);
2065  aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
2066  if (use_svc_control) {
2068  &ref_frame_config);
2070  &ref_frame_comp_pred);
2071  }
2072  if (app_input.multilayer_metadata_file != NULL) {
2073  add_multilayer_metadata(&raw, multilayer_metadata);
2074  }
2075  // Set the speed per layer.
2076  if (test_speed_per_layer) {
2077  int speed_per_layer = 10;
2078  if (layer_id.spatial_layer_id == 0) {
2079  if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
2080  if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
2081  if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
2082  } else if (layer_id.spatial_layer_id == 1) {
2083  if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
2084  if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
2085  if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
2086  } else if (layer_id.spatial_layer_id == 2) {
2087  if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
2088  if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
2089  if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
2090  }
2091  aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
2092  }
2093  } else {
2094  // Only up to 3 temporal layers supported in fixed mode.
2095  // Only need to set spatial and temporal layer_id: reference
2096  // prediction, refresh, and buffer_idx are set internally.
2097  layer_id.spatial_layer_id = slx;
2098  layer_id.temporal_layer_id = 0;
2099  if (ts_number_layers == 2) {
2100  layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
2101  } else if (ts_number_layers == 3) {
2102  if (frame_cnt % 2 != 0)
2103  layer_id.temporal_layer_id = 2;
2104  else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
2105  layer_id.temporal_layer_id = 1;
2106  }
2107  aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
2108  }
2109 
2110  if (set_err_resil_frame && cfg.g_error_resilient == 0) {
2111  // Set error_resilient per frame: off/0 for base layer and
2112  // on/1 for enhancement layer frames.
2113  // Note that this is can only be done on the fly/per-frame/layer
2114  // if the config error_resilience is off/0. See the logic for updating
2115  // in set_encoder_config():
2116  // tool_cfg->error_resilient_mode =
2117  // cfg->g_error_resilient | extra_cfg->error_resilient_mode;
2118  const int err_resil_mode =
2119  layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
2121  err_resil_mode);
2122  }
2123 
2124  layer = slx * ts_number_layers + layer_id.temporal_layer_id;
2125  if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
2126 
2127  if (test_dynamic_scaling_single_layer) {
2128  // Example to scale source down by 2x2, then 4x4, and then back up to
2129  // 2x2, and then back to original.
2130  int frame_2x2 = 200;
2131  int frame_4x4 = 400;
2132  int frame_2x2up = 600;
2133  int frame_orig = 800;
2134  if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
2135  // Scale source down by 2x2.
2136  struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
2137  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2138  } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
2139  // Scale source down by 4x4.
2140  struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
2141  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2142  } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
2143  // Source back up to 2x2.
2144  struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
2145  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2146  } else if (frame_cnt >= frame_orig) {
2147  // Source back up to original resolution (no scaling).
2148  struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
2149  aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2150  }
2151  if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
2152  frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
2153  // For dynamic resize testing on single layer: refresh all references
2154  // on the resized frame: this is to avoid decode error:
2155  // if resize goes down by >= 4x4 then libaom decoder will throw an
2156  // error that some reference (even though not used) is beyond the
2157  // limit size (must be smaller than 4x4).
2158  for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
2159  if (use_svc_control) {
2161  &ref_frame_config);
2163  &ref_frame_comp_pred);
2164  }
2165  }
2166  }
2167 
2168  // Change target_bitrate every other frame.
2169  if (test_changing_bitrate && frame_cnt % 2 == 0) {
2170  if (frame_cnt < 500)
2171  cfg.rc_target_bitrate += 10;
2172  else
2173  cfg.rc_target_bitrate -= 10;
2174  // Do big increase and decrease.
2175  if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
2176  if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
2177  if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
2178  // Call change_config, or bypass with new control.
2179  // res = aom_codec_enc_config_set(&codec, &cfg);
2181  cfg.rc_target_bitrate))
2182  die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
2183  }
2184 
2185  if (rc_api) {
2186  aom::AV1FrameParamsRTC frame_params;
2187  // TODO(jianj): Add support for SVC.
2188  frame_params.spatial_layer_id = 0;
2189  frame_params.temporal_layer_id = 0;
2190  frame_params.frame_type =
2191  is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
2192  rc_api->ComputeQP(frame_params);
2193  const int current_qp = rc_api->GetQP();
2195  qindex_to_quantizer(current_qp))) {
2196  die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
2197  }
2198  }
2199 
2200  if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt);
2201 
2202  // Do the layer encode.
2203  aom_usec_timer_start(&timer);
2204  if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
2205  die_codec(&codec, "Failed to encode frame");
2206  aom_usec_timer_mark(&timer);
2207  cx_time += aom_usec_timer_elapsed(&timer);
2208  cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
2209  frame_cnt_layer[layer] += 1;
2210 
2211  // Get the high motion content flag.
2212  int content_flag = 0;
2214  &content_flag)) {
2215  die_codec(&codec, "Failed to GET_HIGH_MOTION_CONTENT_SCREEN_RTC");
2216  }
2217 
2218  got_data = 0;
2219  // For simulcast (mode 11): write out each spatial layer to the file.
2220  int ss_layers_write = (app_input.layering_mode == 11)
2221  ? layer_id.spatial_layer_id + 1
2222  : ss_number_layers;
2223  while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
2224  switch (pkt->kind) {
2226  for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
2227  ++sl) {
2228  for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
2229  ++tl) {
2230  int j = sl * ts_number_layers + tl;
2231  if (app_input.output_obu) {
2232  fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
2233  obu_files[j]);
2234  } else {
2235  aom_video_writer_write_frame(
2236  outfile[j],
2237  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2238  pkt->data.frame.sz, pts);
2239  }
2240  if (sl == layer_id.spatial_layer_id)
2241  rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
2242  }
2243  }
2244  got_data = 1;
2245  // Write everything into the top layer.
2246  if (app_input.output_obu) {
2247  fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
2248  total_layer_obu_file);
2249  } else {
2250  aom_video_writer_write_frame(
2251  total_layer_file,
2252  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2253  pkt->data.frame.sz, pts);
2254  }
2255  // Keep count of rate control stats per layer (for non-key).
2256  if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
2257  int j = layer_id.spatial_layer_id * ts_number_layers +
2258  layer_id.temporal_layer_id;
2259  assert(j >= 0);
2260  rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
2261  rc.layer_avg_rate_mismatch[j] +=
2262  fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
2263  rc.layer_pfb[j];
2264  if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
2265  }
2266 
2267  if (rc_api) {
2268  rc_api->PostEncodeUpdate(pkt->data.frame.sz);
2269  }
2270  // Update for short-time encoding bitrate states, for moving window
2271  // of size rc->window, shifted by rc->window / 2.
2272  // Ignore first window segment, due to key frame.
2273  // For spatial layers: only do this for top/highest SL.
2274  if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
2275  sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
2276  rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
2277  if (frame_cnt % rc.window_size == 0) {
2278  rc.window_count += 1;
2279  rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
2280  rc.variance_st_encoding_bitrate +=
2281  (sum_bitrate / rc.window_size) *
2282  (sum_bitrate / rc.window_size);
2283  sum_bitrate = 0.0;
2284  }
2285  }
2286  // Second shifted window.
2287  if (frame_cnt > rc.window_size + rc.window_size / 2 &&
2288  slx == ss_number_layers - 1) {
2289  sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
2290  if (frame_cnt > 2 * rc.window_size &&
2291  frame_cnt % rc.window_size == 0) {
2292  rc.window_count += 1;
2293  rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
2294  rc.variance_st_encoding_bitrate +=
2295  (sum_bitrate2 / rc.window_size) *
2296  (sum_bitrate2 / rc.window_size);
2297  sum_bitrate2 = 0.0;
2298  }
2299  }
2300 
2301 #if CONFIG_AV1_DECODER
2302  if (app_input.decode) {
2303  if (aom_codec_decode(
2304  &decoder,
2305  reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2306  pkt->data.frame.sz, NULL))
2307  die_codec(&decoder, "Failed to decode frame");
2308  }
2309 #endif
2310 
2311  break;
2312  case AOM_CODEC_PSNR_PKT:
2313  if (app_input.show_psnr) {
2314  psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0];
2315  psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0];
2316  for (int plane = 0; plane < 4; plane++) {
2317  psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane];
2318  }
2319  psnr_stream.psnr_count[0]++;
2320  }
2321  break;
2322  default: break;
2323  }
2324  }
2325 #if CONFIG_AV1_DECODER
2326  if (got_data && app_input.decode) {
2327  // Don't look for mismatch on top spatial and top temporal layers as
2328  // they are non reference frames.
2329  if ((ss_number_layers > 1 || ts_number_layers > 1) &&
2330  !(layer_id.temporal_layer_id > 0 &&
2331  layer_id.temporal_layer_id == ts_number_layers - 1)) {
2332  if (test_decode(&codec, &decoder, frame_cnt)) {
2333 #if CONFIG_INTERNAL_STATS
2334  fprintf(stats_file, "First mismatch occurred in frame %d\n",
2335  frame_cnt);
2336  fclose(stats_file);
2337 #endif
2338  fatal("Mismatch seen");
2339  }
2340  }
2341  }
2342 #endif
2343  } // loop over spatial layers
2344  ++frame_cnt;
2345  pts += frame_duration;
2346  }
2347 
2348  for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
2349  if (app_input.input_ctx[i].filename == NULL) {
2350  break;
2351  }
2352  close_input_file(&(app_input.input_ctx[i]));
2353  }
2354  printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
2355  ts_number_layers);
2356 
2357  printf("\n");
2358  for (int slx = 0; slx < ss_number_layers; slx++)
2359  for (int tlx = 0; tlx < ts_number_layers; tlx++) {
2360  int lx = slx * ts_number_layers + tlx;
2361  printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
2362  slx, tlx, frame_cnt_layer[lx],
2363  (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
2364  1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
2365  }
2366 
2367  printf("\n");
2368  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
2369  frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
2370  1000000 * (double)frame_cnt / (double)cx_time);
2371 
2372  if (app_input.show_psnr) {
2373  show_psnr(&psnr_stream, 255.0);
2374  }
2375 
2376  if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
2377 
2378 #if CONFIG_AV1_DECODER
2379  if (app_input.decode) {
2380  if (aom_codec_destroy(&decoder))
2381  die_codec(&decoder, "Failed to destroy decoder");
2382  }
2383 #endif
2384 
2385 #if CONFIG_INTERNAL_STATS
2386  fprintf(stats_file, "No mismatch detected in recon buffers\n");
2387  fclose(stats_file);
2388 #endif
2389 
2390  // Try to rewrite the output file headers with the actual frame count.
2391  for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
2392  aom_video_writer_close(outfile[i]);
2393  aom_video_writer_close(total_layer_file);
2394 
2395  if (has_non_y4m_input) {
2396  aom_img_free(&raw);
2397  }
2398  return EXIT_SUCCESS;
2399 }
Definition: aom_encoder.h:202
Codec control function to set max data rate for intra frames, unsigned int parameter.
Definition: aomcx.h:312
Operation completed without error.
Definition: aom_codec.h:157
int number_spatial_layers
Definition: aomcx.h:1737
unsigned int d_h
Definition: aom_image.h:198
unsigned int kf_max_dist
Keyframe maximum interval.
Definition: aom_encoder.h:786
Codec control function to encode with CDEF, unsigned int parameter.
Definition: aomcx.h:677
unsigned int g_w
Width of the frame.
Definition: aom_encoder.h:426
Codec control to set the frame drop mode for SVC, unsigned int parameter. The valid values are consta...
Definition: aomcx.h:1549
Codec control to set the target bitrate in kilobits per second, unsigned int parameter. For 1 pass CBR mode, single layer encoding. This controls replaces the call aom_codec_enc_config_set(&codec, &cfg) when only target bitrate is changed, and so is much cheaper as it bypasses a lot of unneeded code checks.
Definition: aomcx.h:1536
Codec control to set the maximum number of consecutive frame drops, in units of time (milliseconds)...
Definition: aomcx.h:1579
#define AOM_CODEC_CONTROL_TYPECHECKED(ctx, id, data)
aom_codec_control wrapper macro (adds type-checking, less flexible)
Definition: aom_codec.h:542
int layer_target_bitrate[32]
Definition: aomcx.h:1744
unsigned int rc_target_bitrate
Target data rate.
Definition: aom_encoder.h:644
unsigned char * active_map
specify an on (1) or off (0) each 16x16 region within a frame
Definition: aomcx.h:1637
Codec control function to turn on / off frame order hint (int parameter). Affects: joint compound mod...
Definition: aomcx.h:872
Describes the encoder algorithm interface to applications.
int spatial_layer_id
Definition: aomcx.h:1726
Definition: aom_image.h:143
#define aom_codec_enc_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_enc_init_ver()
Definition: aom_encoder.h:943
unsigned int rc_buf_optimal_sz
Decoder Buffer Optimal Size.
Definition: aom_encoder.h:723
Encoder configuration structure.
Definition: aom_encoder.h:387
enum aom_kf_mode kf_mode
Keyframe placement mode.
Definition: aom_encoder.h:768
Control to use default tx type only for intra modes, int parameter.
Definition: aomcx.h:1212
#define AOM_CODEC_USE_PSNR
Initialization-time Feature Enabling.
Definition: aom_encoder.h:79
Definition: aomcx.h:1725
aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg, unsigned int usage)
Get the default configuration for a usage.
Definition: aom_encoder.h:187
Describes the aom image descriptor and associated operations.
Codec control function to set encoder scaling mode for the next frame to be coded, aom_scaling_mode_t* parameter.
Definition: aomcx.h:197
aom_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition: aom_encoder.h:497
Provides definitions for using AOM or AV1 encoder algorithm within the aom Codec Interface.
Codec context structure.
Definition: aom_codec.h:315
Codec control function to turn on/off intra block copy mode, int parameter.
Definition: aomcx.h:1120
Codec control function to turn on / off warped motion usage at sequence level, int parameter...
Definition: aomcx.h:1045
#define AOM_IMG_FMT_HIGHBITDEPTH
Definition: aom_image.h:38
int min_quantizers[32]
Definition: aomcx.h:1740
Describes the decoder algorithm interface to applications.
int aom_img_add_metadata(aom_image_t *img, uint32_t type, const uint8_t *data, size_t sz, aom_metadata_insert_flags_t insert_flag)
Add metadata to image.
int refresh[8]
Definition: aomcx.h:1769
Image Descriptor.
Definition: aom_image.h:182
int reference[7]
Definition: aomcx.h:1766
double psnr[4]
Definition: aom_encoder.h:145
#define AOM_MAX_TS_LAYERS
Definition: aomcx.h:1722
unsigned int rc_undershoot_pct
Rate control adaptation undershoot control.
Definition: aom_encoder.h:681
Codec control function to set the reference frame config, aom_svc_ref_frame_config_t* parameter...
Definition: aomcx.h:1295
aom_image_t * aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
enum aom_rc_mode rc_end_usage
Rate control algorithm to use.
Definition: aom_encoder.h:623
aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data, size_t data_sz, void *user_priv)
Decode data.
unsigned int g_profile
Bitstream profile to use.
Definition: aom_encoder.h:417
const aom_codec_cx_pkt_t * aom_codec_get_cx_data(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter)
Encoded data iterator.
aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img, aom_codec_pts_t pts, unsigned long duration, aom_enc_frame_flags_t flags)
Encode a frame.
#define AOM_USAGE_REALTIME
usage parameter analogous to AV1 REALTIME mode.
Definition: aom_encoder.h:1016
Definition: aom_codec.h:336
const struct aom_codec_iface aom_codec_iface_t
Codec interface structure.
Definition: aom_codec.h:271
unsigned int rc_buf_initial_sz
Decoder Buffer Initial Size.
Definition: aom_encoder.h:714
#define aom_codec_dec_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_dec_init_ver()
Definition: aom_decoder.h:129
int temporal_layer_id
Definition: aomcx.h:1727
Definition: aomcx.h:1773
const char * aom_codec_iface_name(aom_codec_iface_t *iface)
Return the name for a given interface.
struct aom_rational g_timebase
Stream timebase units.
Definition: aom_encoder.h:489
Definition: aom_encoder.h:113
aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx)
Destroy a codec instance.
int scaling_factor_num[4]
Definition: aomcx.h:1741
const char * aom_codec_err_to_string(aom_codec_err_t err)
Convert error number to printable string.
unsigned int cols
Definition: aomcx.h:1639
Codec control function to turn on / off CFL uv intra mode usage, int parameter.
Definition: aomcx.h:1095
Memory operation failed.
Definition: aom_codec.h:163
enum aom_codec_cx_pkt_kind kind
Definition: aom_encoder.h:123
Codec control function to enable error_resilient_mode, int parameter.
Definition: aomcx.h:448
void aom_img_free(aom_image_t *img)
Close an image descriptor.
Codec control function to turn on / off global motion usage for a sequence, int parameter.
Definition: aomcx.h:1035
Codec control to set auto tiling, unsigned int parameter. Value of 1 means encoder will set number of...
Definition: aomcx.h:1557
Codec control function to set SVC parameters, aom_svc_params_t* parameter.
Definition: aomcx.h:1290
Codec control function to turn on / off filter intra usage at sequence level, int parameter...
Definition: aomcx.h:1066
struct aom_codec_cx_pkt::@1::@2 frame
Definition: aom_encoder.h:110
aom active region map
Definition: aomcx.h:1635
int use_comp_pred[3]
Definition: aomcx.h:1776
unsigned int rc_resize_mode
Mode for spatial resampling, if supported by the codec.
Definition: aom_encoder.h:549
#define AOM_MAX_LAYERS
Definition: aomcx.h:1720
unsigned int rc_max_quantizer
Maximum (Worst Quality) Quantizer.
Definition: aom_encoder.h:668
Control to set frequency of the cost updates for intrabc motion vectors, unsigned int parameter...
Definition: aomcx.h:1366
int max_quantizers[32]
Definition: aomcx.h:1739
#define AOM_CODEC_USE_HIGHBITDEPTH
Definition: aom_encoder.h:80
unsigned int rc_buf_sz
Decoder Buffer Size.
Definition: aom_encoder.h:705
Codec control to enable post encode frame drop for RTC encoding, int parameter.
Definition: aomcx.h:1573
Control to set frequency of the cost updates for motion vectors, unsigned int parameter.
Definition: aomcx.h:1263
int number_temporal_layers
Definition: aomcx.h:1738
unsigned int rc_overshoot_pct
Rate control adaptation overshoot control.
Definition: aom_encoder.h:690
Codec control function to turn on/off palette mode, int parameter.
Definition: aomcx.h:1116
Control to set frequency of the cost updates for mode, unsigned int parameter.
Definition: aomcx.h:1253
Codec control to control loop filter.
Definition: aomcx.h:1415
Codec control function to get a pointer to the new frame.
Definition: aom.h:70
Codec control function to set encoder internal speed settings, int parameter.
Definition: aomcx.h:220
Codec control function to enable RDO modulated by frame temporal dependency, unsigned int parameter...
Definition: aomcx.h:414
Codec control function to set the delta q mode, unsigned int parameter.
Definition: aomcx.h:1140
aom_codec_iface_t * aom_codec_av1_cx(void)
The interface to the AV1 encoder.
Codec control function to pass an Active map to encoder, aom_active_map_t* parameter.
Definition: aomcx.h:190
unsigned int kf_min_dist
Keyframe minimum interval.
Definition: aom_encoder.h:777
const void * aom_codec_iter_t
Iterator.
Definition: aom_codec.h:305
Codec control function to set flag for rate control used by external encoders.
Definition: aomcx.h:1435
unsigned int rows
Definition: aomcx.h:1638
#define AOM_FRAME_IS_KEY
Definition: aom_codec.h:288
Definition: aom_image.h:166
long aom_codec_flags_t
Initialization-time Feature Enabling.
Definition: aom_codec.h:232
Definition: aomcx.h:1736
Codec control function to turn on / off smooth intra modes usage, int parameter.
Definition: aomcx.h:1077
enum aom_bit_depth aom_bit_depth_t
Bit depth for codecThis enumeration determines the bit depth of the codec.
Codec control function to set reference frame compound prediction. aom_svc_ref_frame_comp_pred_t* par...
Definition: aomcx.h:1400
Codec control to get the high motion content flag, used for screen content realtime (RTC) encoding...
Definition: aomcx.h:1564
unsigned int g_usage
Algorithm specific "usage" value.
Definition: aom_encoder.h:399
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition: aom_encoder.h:475
aom_codec_err_t
Algorithm return codes.
Definition: aom_codec.h:155
Control to set frequency of the cost updates for coefficients, unsigned int parameter.
Definition: aomcx.h:1243
enum aom_chroma_sample_position aom_chroma_sample_position_t
List of chroma sample positions.
aom image scaling mode
Definition: aomcx.h:1647
Codec control to set quantizer for the next frame, int parameter.
Definition: aomcx.h:1498
int scaling_factor_den[4]
Definition: aomcx.h:1742
int den
Definition: aom_encoder.h:166
Codec control function to set content type, aom_tune_content parameter.
Definition: aomcx.h:503
int framerate_factor[8]
Definition: aomcx.h:1746
Codec control function to set adaptive quantization mode, unsigned int parameter. ...
Definition: aomcx.h:474
Encoder output packet.
Definition: aom_encoder.h:122
Definition: aomcx.h:1782
unsigned int rc_min_quantizer
Minimum (Best Quality) Quantizer.
Definition: aom_encoder.h:658
Codec control function to predict with OBMC mode, unsigned int parameter.
Definition: aomcx.h:704
int num
Definition: aom_encoder.h:165
Boost percentage for Golden Frame in CBR mode, unsigned int parameter.
Definition: aomcx.h:345
Definition: aom_image.h:45
Definition: aom_codec.h:337
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition: aom_encoder.h:518
Codec control function to set CDF update mode, unsigned int parameter.
Definition: aomcx.h:512
An application-supplied parameter is not valid.
Definition: aom_codec.h:200
Codec control function to turn on/off intra angle delta, int parameter.
Definition: aomcx.h:1124
unsigned int g_threads
Maximum number of threads to use.
Definition: aom_encoder.h:407
union aom_codec_cx_pkt::@1 data
aom_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition: aom_encoder.h:467
Definition: aomcx.h:1750
unsigned int d_w
Definition: aom_image.h:197
aom_codec_err_t aom_codec_control(aom_codec_ctx_t *ctx, int ctrl_id,...)
Algorithm Control.
unsigned int g_h
Height of the frame.
Definition: aom_encoder.h:435
aom_img_fmt_t fmt
Definition: aom_image.h:183
Codec control function to set the layer id, aom_svc_layer_id_t* parameter.
Definition: aomcx.h:1285
unsigned int rc_dropframe_thresh
Temporal resampling configuration, if supported by the codec.
Definition: aom_encoder.h:540
int ref_idx[7]
Definition: aomcx.h:1768