WebM Codec SDK
vp9_spatial_svc_encoder
1 /*
2  * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  * Use of this source code is governed by a BSD-style license
5  * that can be found in the LICENSE file in the root of the source
6  * tree. An additional intellectual property rights grant can be found
7  * in the file PATENTS. All contributing project authors may
8  * be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 /*
12  * This is an example demonstrating how to implement a multi-layer
13  * VP9 encoding scheme based on spatial scalability for video applications
14  * that benefit from a scalable bitstream.
15  */
16 
17 #include <math.h>
18 #include <stdarg.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <time.h>
22 
23 #include "../args.h"
24 #include "../tools_common.h"
25 #include "../video_writer.h"
26 
27 #include "../vpx_ports/vpx_timer.h"
28 #include "./svc_context.h"
29 #include "vpx/vp8cx.h"
30 #include "vpx/vpx_encoder.h"
31 #include "../vpxstats.h"
32 #include "vp9/encoder/vp9_encoder.h"
33 #include "./y4minput.h"
34 
35 #define OUTPUT_RC_STATS 1
36 
37 #define SIMULCAST_MODE 0
38 
39 static const arg_def_t outputfile =
40  ARG_DEF("o", "output", 1, "Output filename");
41 static const arg_def_t skip_frames_arg =
42  ARG_DEF("s", "skip-frames", 1, "input frames to skip");
43 static const arg_def_t frames_arg =
44  ARG_DEF("f", "frames", 1, "number of frames to encode");
45 static const arg_def_t threads_arg =
46  ARG_DEF("th", "threads", 1, "number of threads to use");
47 #if OUTPUT_RC_STATS
48 static const arg_def_t output_rc_stats_arg =
49  ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats");
50 #endif
51 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
52 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
53 static const arg_def_t timebase_arg =
54  ARG_DEF("t", "timebase", 1, "timebase (num/den)");
55 static const arg_def_t bitrate_arg = ARG_DEF(
56  "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second");
57 static const arg_def_t spatial_layers_arg =
58  ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
59 static const arg_def_t temporal_layers_arg =
60  ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
61 static const arg_def_t temporal_layering_mode_arg =
62  ARG_DEF("tlm", "temporal-layering-mode", 1,
63  "temporal layering scheme."
64  "VP9E_TEMPORAL_LAYERING_MODE");
65 static const arg_def_t kf_dist_arg =
66  ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
67 static const arg_def_t scale_factors_arg =
68  ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");
69 static const arg_def_t min_q_arg =
70  ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
71 static const arg_def_t max_q_arg =
72  ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
73 static const arg_def_t min_bitrate_arg =
74  ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate");
75 static const arg_def_t max_bitrate_arg =
76  ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate");
77 static const arg_def_t lag_in_frame_arg =
78  ARG_DEF(NULL, "lag-in-frames", 1,
79  "Number of frame to input before "
80  "generating any outputs");
81 static const arg_def_t rc_end_usage_arg =
82  ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
83 static const arg_def_t speed_arg =
84  ARG_DEF("sp", "speed", 1, "speed configuration");
85 static const arg_def_t aqmode_arg =
86  ARG_DEF("aq", "aqmode", 1, "aq-mode off/on");
87 static const arg_def_t bitrates_arg =
88  ARG_DEF("bl", "bitrates", 1, "bitrates[sl * num_tl + tl]");
89 static const arg_def_t dropframe_thresh_arg =
90  ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
91 static const struct arg_enum_list tune_content_enum[] = {
92  { "default", VP9E_CONTENT_DEFAULT },
93  { "screen", VP9E_CONTENT_SCREEN },
94  { "film", VP9E_CONTENT_FILM },
95  { NULL, 0 }
96 };
97 
98 static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
99  NULL, "tune-content", 1, "Tune content type", tune_content_enum);
100 static const arg_def_t inter_layer_pred_arg = ARG_DEF(
101  NULL, "inter-layer-pred", 1, "0 - 3: On, Off, Key-frames, Constrained");
102 
103 #if CONFIG_VP9_HIGHBITDEPTH
104 static const struct arg_enum_list bitdepth_enum[] = {
105  { "8", VPX_BITS_8 }, { "10", VPX_BITS_10 }, { "12", VPX_BITS_12 }, { NULL, 0 }
106 };
107 
108 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
109  "d", "bit-depth", 1, "Bit depth for codec 8, 10 or 12. ", bitdepth_enum);
110 #endif // CONFIG_VP9_HIGHBITDEPTH
111 
112 static const arg_def_t *svc_args[] = { &frames_arg,
113  &outputfile,
114  &width_arg,
115  &height_arg,
116  &timebase_arg,
117  &bitrate_arg,
118  &skip_frames_arg,
119  &spatial_layers_arg,
120  &kf_dist_arg,
121  &scale_factors_arg,
122  &min_q_arg,
123  &max_q_arg,
124  &min_bitrate_arg,
125  &max_bitrate_arg,
126  &temporal_layers_arg,
127  &temporal_layering_mode_arg,
128  &lag_in_frame_arg,
129  &threads_arg,
130  &aqmode_arg,
131 #if OUTPUT_RC_STATS
132  &output_rc_stats_arg,
133 #endif
134 
135 #if CONFIG_VP9_HIGHBITDEPTH
136  &bitdepth_arg,
137 #endif
138  &speed_arg,
139  &rc_end_usage_arg,
140  &bitrates_arg,
141  &dropframe_thresh_arg,
142  &tune_content_arg,
143  &inter_layer_pred_arg,
144  NULL };
145 
146 static const uint32_t default_frames_to_skip = 0;
147 static const uint32_t default_frames_to_code = 60 * 60;
148 static const uint32_t default_width = 1920;
149 static const uint32_t default_height = 1080;
150 static const uint32_t default_timebase_num = 1;
151 static const uint32_t default_timebase_den = 60;
152 static const uint32_t default_bitrate = 1000;
153 static const uint32_t default_spatial_layers = 5;
154 static const uint32_t default_temporal_layers = 1;
155 static const uint32_t default_kf_dist = 100;
156 static const uint32_t default_temporal_layering_mode = 0;
157 static const uint32_t default_output_rc_stats = 0;
158 static const int32_t default_speed = -1; // -1 means use library default.
159 static const uint32_t default_threads = 0; // zero means use library default.
160 
161 typedef struct {
162  const char *output_filename;
163  uint32_t frames_to_code;
164  uint32_t frames_to_skip;
165  struct VpxInputContext input_ctx;
166  stats_io_t rc_stats;
167  int tune_content;
168  int inter_layer_pred;
169 } AppInput;
170 
171 static const char *exec_name;
172 
173 void usage_exit(void) {
174  fprintf(stderr, "Usage: %s <options> input_filename -o output_filename\n",
175  exec_name);
176  fprintf(stderr, "Options:\n");
177  arg_show_usage(stderr, svc_args);
178  exit(EXIT_FAILURE);
179 }
180 
181 static void parse_command_line(int argc, const char **argv_,
182  AppInput *app_input, SvcContext *svc_ctx,
183  vpx_codec_enc_cfg_t *enc_cfg) {
184  struct arg arg;
185  char **argv = NULL;
186  char **argi = NULL;
187  char **argj = NULL;
188  vpx_codec_err_t res;
189  unsigned int min_bitrate = 0;
190  unsigned int max_bitrate = 0;
191  char string_options[1024] = { 0 };
192 
193  // initialize SvcContext with parameters that will be passed to vpx_svc_init
194  svc_ctx->log_level = SVC_LOG_DEBUG;
195  svc_ctx->spatial_layers = default_spatial_layers;
196  svc_ctx->temporal_layers = default_temporal_layers;
197  svc_ctx->temporal_layering_mode = default_temporal_layering_mode;
198 #if OUTPUT_RC_STATS
199  svc_ctx->output_rc_stat = default_output_rc_stats;
200 #endif
201  svc_ctx->speed = default_speed;
202  svc_ctx->threads = default_threads;
203 
204  // start with default encoder configuration
205  res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
206  if (res) {
207  die("Failed to get config: %s\n", vpx_codec_err_to_string(res));
208  }
209  // update enc_cfg with app default values
210  enc_cfg->g_w = default_width;
211  enc_cfg->g_h = default_height;
212  enc_cfg->g_timebase.num = default_timebase_num;
213  enc_cfg->g_timebase.den = default_timebase_den;
214  enc_cfg->rc_target_bitrate = default_bitrate;
215  enc_cfg->kf_min_dist = default_kf_dist;
216  enc_cfg->kf_max_dist = default_kf_dist;
217  enc_cfg->rc_end_usage = VPX_CQ;
218 
219  // initialize AppInput with default values
220  app_input->frames_to_code = default_frames_to_code;
221  app_input->frames_to_skip = default_frames_to_skip;
222 
223  // process command line options
224  argv = argv_dup(argc - 1, argv_ + 1);
225  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
226  arg.argv_step = 1;
227 
228  if (arg_match(&arg, &frames_arg, argi)) {
229  app_input->frames_to_code = arg_parse_uint(&arg);
230  } else if (arg_match(&arg, &outputfile, argi)) {
231  app_input->output_filename = arg.val;
232  } else if (arg_match(&arg, &width_arg, argi)) {
233  enc_cfg->g_w = arg_parse_uint(&arg);
234  } else if (arg_match(&arg, &height_arg, argi)) {
235  enc_cfg->g_h = arg_parse_uint(&arg);
236  } else if (arg_match(&arg, &timebase_arg, argi)) {
237  enc_cfg->g_timebase = arg_parse_rational(&arg);
238  } else if (arg_match(&arg, &bitrate_arg, argi)) {
239  enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
240  } else if (arg_match(&arg, &skip_frames_arg, argi)) {
241  app_input->frames_to_skip = arg_parse_uint(&arg);
242  } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
243  svc_ctx->spatial_layers = arg_parse_uint(&arg);
244  } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
245  svc_ctx->temporal_layers = arg_parse_uint(&arg);
246 #if OUTPUT_RC_STATS
247  } else if (arg_match(&arg, &output_rc_stats_arg, argi)) {
248  svc_ctx->output_rc_stat = arg_parse_uint(&arg);
249 #endif
250  } else if (arg_match(&arg, &speed_arg, argi)) {
251  svc_ctx->speed = arg_parse_uint(&arg);
252  if (svc_ctx->speed > 9) {
253  warn("Mapping speed %d to speed 9.\n", svc_ctx->speed);
254  }
255  } else if (arg_match(&arg, &aqmode_arg, argi)) {
256  svc_ctx->aqmode = arg_parse_uint(&arg);
257  } else if (arg_match(&arg, &threads_arg, argi)) {
258  svc_ctx->threads = arg_parse_uint(&arg);
259  } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
260  svc_ctx->temporal_layering_mode = enc_cfg->temporal_layering_mode =
261  arg_parse_int(&arg);
262  if (svc_ctx->temporal_layering_mode) {
263  enc_cfg->g_error_resilient = 1;
264  }
265  } else if (arg_match(&arg, &kf_dist_arg, argi)) {
266  enc_cfg->kf_min_dist = arg_parse_uint(&arg);
267  enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
268  } else if (arg_match(&arg, &scale_factors_arg, argi)) {
269  strncat(string_options, " scale-factors=",
270  sizeof(string_options) - strlen(string_options) - 1);
271  strncat(string_options, arg.val,
272  sizeof(string_options) - strlen(string_options) - 1);
273  } else if (arg_match(&arg, &bitrates_arg, argi)) {
274  strncat(string_options, " bitrates=",
275  sizeof(string_options) - strlen(string_options) - 1);
276  strncat(string_options, arg.val,
277  sizeof(string_options) - strlen(string_options) - 1);
278  } else if (arg_match(&arg, &min_q_arg, argi)) {
279  strncat(string_options, " min-quantizers=",
280  sizeof(string_options) - strlen(string_options) - 1);
281  strncat(string_options, arg.val,
282  sizeof(string_options) - strlen(string_options) - 1);
283  } else if (arg_match(&arg, &max_q_arg, argi)) {
284  strncat(string_options, " max-quantizers=",
285  sizeof(string_options) - strlen(string_options) - 1);
286  strncat(string_options, arg.val,
287  sizeof(string_options) - strlen(string_options) - 1);
288  } else if (arg_match(&arg, &min_bitrate_arg, argi)) {
289  min_bitrate = arg_parse_uint(&arg);
290  } else if (arg_match(&arg, &max_bitrate_arg, argi)) {
291  max_bitrate = arg_parse_uint(&arg);
292  } else if (arg_match(&arg, &lag_in_frame_arg, argi)) {
293  enc_cfg->g_lag_in_frames = arg_parse_uint(&arg);
294  } else if (arg_match(&arg, &rc_end_usage_arg, argi)) {
295  enc_cfg->rc_end_usage = arg_parse_uint(&arg);
296 #if CONFIG_VP9_HIGHBITDEPTH
297  } else if (arg_match(&arg, &bitdepth_arg, argi)) {
298  enc_cfg->g_bit_depth = arg_parse_enum_or_int(&arg);
299  switch (enc_cfg->g_bit_depth) {
300  case VPX_BITS_8:
301  enc_cfg->g_input_bit_depth = 8;
302  enc_cfg->g_profile = 0;
303  break;
304  case VPX_BITS_10:
305  enc_cfg->g_input_bit_depth = 10;
306  enc_cfg->g_profile = 2;
307  break;
308  case VPX_BITS_12:
309  enc_cfg->g_input_bit_depth = 12;
310  enc_cfg->g_profile = 2;
311  break;
312  default:
313  die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
314  break;
315  }
316 #endif // CONFIG_VP9_HIGHBITDEPTH
317  } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
318  enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
319  } else if (arg_match(&arg, &tune_content_arg, argi)) {
320  app_input->tune_content = arg_parse_uint(&arg);
321  } else if (arg_match(&arg, &inter_layer_pred_arg, argi)) {
322  app_input->inter_layer_pred = arg_parse_uint(&arg);
323  } else {
324  ++argj;
325  }
326  }
327 
328  // There will be a space in front of the string options
329  if (strlen(string_options) > 0)
330  vpx_svc_set_options(svc_ctx, string_options + 1);
331 
332  enc_cfg->g_pass = VPX_RC_ONE_PASS;
333 
334  if (enc_cfg->rc_target_bitrate > 0) {
335  if (min_bitrate > 0) {
336  enc_cfg->rc_2pass_vbr_minsection_pct =
337  min_bitrate * 100 / enc_cfg->rc_target_bitrate;
338  }
339  if (max_bitrate > 0) {
340  enc_cfg->rc_2pass_vbr_maxsection_pct =
341  max_bitrate * 100 / enc_cfg->rc_target_bitrate;
342  }
343  }
344 
345  // Check for unrecognized options
346  for (argi = argv; *argi; ++argi)
347  if (argi[0][0] == '-' && strlen(argi[0]) > 1)
348  die("Error: Unrecognized option %s\n", *argi);
349 
350  if (argv[0] == NULL) {
351  usage_exit();
352  }
353  app_input->input_ctx.filename = argv[0];
354  free(argv);
355 
356  open_input_file(&app_input->input_ctx);
357  if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) {
358  enc_cfg->g_w = app_input->input_ctx.width;
359  enc_cfg->g_h = app_input->input_ctx.height;
360  }
361 
362  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
363  enc_cfg->g_h % 2)
364  die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
365 
366  printf(
367  "Codec %s\nframes: %d, skip: %d\n"
368  "layers: %d\n"
369  "width %d, height: %d,\n"
370  "num: %d, den: %d, bitrate: %d,\n"
371  "gop size: %d\n",
372  vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code,
373  app_input->frames_to_skip, svc_ctx->spatial_layers, enc_cfg->g_w,
374  enc_cfg->g_h, enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
375  enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
376 }
377 
378 #if OUTPUT_RC_STATS
379 // For rate control encoding stats.
380 struct RateControlStats {
381  // Number of input frames per layer.
382  int layer_input_frames[VPX_MAX_LAYERS];
383  // Total (cumulative) number of encoded frames per layer.
384  int layer_tot_enc_frames[VPX_MAX_LAYERS];
385  // Number of encoded non-key frames per layer.
386  int layer_enc_frames[VPX_MAX_LAYERS];
387  // Framerate per layer (cumulative).
388  double layer_framerate[VPX_MAX_LAYERS];
389  // Target average frame size per layer (per-frame-bandwidth per layer).
390  double layer_pfb[VPX_MAX_LAYERS];
391  // Actual average frame size per layer.
392  double layer_avg_frame_size[VPX_MAX_LAYERS];
393  // Average rate mismatch per layer (|target - actual| / target).
394  double layer_avg_rate_mismatch[VPX_MAX_LAYERS];
395  // Actual encoding bitrate per layer (cumulative).
396  double layer_encoding_bitrate[VPX_MAX_LAYERS];
397  // Average of the short-time encoder actual bitrate.
398  // TODO(marpan): Should we add these short-time stats for each layer?
399  double avg_st_encoding_bitrate;
400  // Variance of the short-time encoder actual bitrate.
401  double variance_st_encoding_bitrate;
402  // Window (number of frames) for computing short-time encoding bitrate.
403  int window_size;
404  // Number of window measurements.
405  int window_count;
406 };
407 
408 // Note: these rate control stats assume only 1 key frame in the
409 // sequence (i.e., first frame only).
410 static void set_rate_control_stats(struct RateControlStats *rc,
411  vpx_codec_enc_cfg_t *cfg) {
412  unsigned int sl, tl;
413  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
414  // per-frame-bandwidth, for the rate control encoding stats below.
415  const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
416 
417  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
418  for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
419  const int layer = sl * cfg->ts_number_layers + tl;
420  if (cfg->ts_number_layers == 1)
421  rc->layer_framerate[layer] = framerate;
422  else
423  rc->layer_framerate[layer] = framerate / cfg->ts_rate_decimator[tl];
424  if (tl > 0) {
425  rc->layer_pfb[layer] =
426  1000.0 *
427  (cfg->layer_target_bitrate[layer] -
428  cfg->layer_target_bitrate[layer - 1]) /
429  (rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]);
430  } else {
431  rc->layer_pfb[layer] = 1000.0 * cfg->layer_target_bitrate[layer] /
432  rc->layer_framerate[layer];
433  }
434  rc->layer_input_frames[layer] = 0;
435  rc->layer_enc_frames[layer] = 0;
436  rc->layer_tot_enc_frames[layer] = 0;
437  rc->layer_encoding_bitrate[layer] = 0.0;
438  rc->layer_avg_frame_size[layer] = 0.0;
439  rc->layer_avg_rate_mismatch[layer] = 0.0;
440  }
441  }
442  rc->window_count = 0;
443  rc->window_size = 15;
444  rc->avg_st_encoding_bitrate = 0.0;
445  rc->variance_st_encoding_bitrate = 0.0;
446 }
447 
448 static void printout_rate_control_summary(struct RateControlStats *rc,
449  vpx_codec_enc_cfg_t *cfg,
450  int frame_cnt) {
451  unsigned int sl, tl;
452  double perc_fluctuation = 0.0;
453  int tot_num_frames = 0;
454  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
455  printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
457  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
458  tot_num_frames = 0;
459  for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
460  const int layer = sl * cfg->ts_number_layers + tl;
461  const int num_dropped =
462  (tl > 0)
463  ? (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer])
464  : (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] -
465  1);
466  tot_num_frames += rc->layer_input_frames[layer];
467  rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
468  rc->layer_encoding_bitrate[layer] /
469  tot_num_frames;
470  rc->layer_avg_frame_size[layer] =
471  rc->layer_avg_frame_size[layer] / rc->layer_enc_frames[layer];
472  rc->layer_avg_rate_mismatch[layer] = 100.0 *
473  rc->layer_avg_rate_mismatch[layer] /
474  rc->layer_enc_frames[layer];
475  printf("For layer#: sl%d tl%d \n", sl, tl);
476  printf("Bitrate (target vs actual): %d %f.0 kbps\n",
477  cfg->layer_target_bitrate[layer],
478  rc->layer_encoding_bitrate[layer]);
479  printf("Average frame size (target vs actual): %f %f bits\n",
480  rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]);
481  printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[layer]);
482  printf(
483  "Number of input frames, encoded (non-key) frames, "
484  "and percent dropped frames: %d %d %f.0 \n",
485  rc->layer_input_frames[layer], rc->layer_enc_frames[layer],
486  100.0 * num_dropped / rc->layer_input_frames[layer]);
487  printf("\n");
488  }
489  }
490  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
491  rc->variance_st_encoding_bitrate =
492  rc->variance_st_encoding_bitrate / rc->window_count -
493  (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
494  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
495  rc->avg_st_encoding_bitrate;
496  printf("Short-time stats, for window of %d frames: \n", rc->window_size);
497  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
498  rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
499  perc_fluctuation);
500  printf("Num of input, num of encoded (super) frames: %d %d \n", frame_cnt,
501  tot_num_frames);
502 }
503 
504 static vpx_codec_err_t parse_superframe_index(const uint8_t *data,
505  size_t data_sz, uint64_t sizes[8],
506  int *count) {
507  // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
508  // it is a super frame index. If the last byte of real video compression
509  // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
510  // not the associated matching marker byte at the front of the index we have
511  // an invalid bitstream and need to return an error.
512 
513  uint8_t marker;
514 
515  marker = *(data + data_sz - 1);
516  *count = 0;
517 
518  if ((marker & 0xe0) == 0xc0) {
519  const uint32_t frames = (marker & 0x7) + 1;
520  const uint32_t mag = ((marker >> 3) & 0x3) + 1;
521  const size_t index_sz = 2 + mag * frames;
522 
523  // This chunk is marked as having a superframe index but doesn't have
524  // enough data for it, thus it's an invalid superframe index.
525  if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME;
526 
527  {
528  const uint8_t marker2 = *(data + data_sz - index_sz);
529 
530  // This chunk is marked as having a superframe index but doesn't have
531  // the matching marker byte at the front of the index therefore it's an
532  // invalid chunk.
533  if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME;
534  }
535 
536  {
537  // Found a valid superframe index.
538  uint32_t i, j;
539  const uint8_t *x = &data[data_sz - index_sz + 1];
540 
541  for (i = 0; i < frames; ++i) {
542  uint32_t this_sz = 0;
543 
544  for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8);
545  sizes[i] = this_sz;
546  }
547  *count = frames;
548  }
549  }
550  return VPX_CODEC_OK;
551 }
552 #endif
553 
554 // Example pattern for spatial layers and 2 temporal layers used in the
555 // bypass/flexible mode. The pattern corresponds to the pattern
556 // VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
557 // non-flexible mode.
558 static void set_frame_flags_bypass_mode_ex0(
559  int tl, int num_spatial_layers, int is_key_frame,
560  vpx_svc_ref_frame_config_t *ref_frame_config) {
561  int sl;
562  for (sl = 0; sl < num_spatial_layers; ++sl)
563  ref_frame_config->update_buffer_slot[sl] = 0;
564 
565  for (sl = 0; sl < num_spatial_layers; ++sl) {
566  // Set the buffer idx.
567  if (tl == 0) {
568  ref_frame_config->lst_fb_idx[sl] = sl;
569  if (sl) {
570  if (is_key_frame) {
571  ref_frame_config->lst_fb_idx[sl] = sl - 1;
572  ref_frame_config->gld_fb_idx[sl] = sl;
573  } else {
574  ref_frame_config->gld_fb_idx[sl] = sl - 1;
575  }
576  } else {
577  ref_frame_config->gld_fb_idx[sl] = 0;
578  }
579  ref_frame_config->alt_fb_idx[sl] = 0;
580  } else if (tl == 1) {
581  ref_frame_config->lst_fb_idx[sl] = sl;
582  ref_frame_config->gld_fb_idx[sl] = num_spatial_layers + sl - 1;
583  ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl;
584  }
585  // Set the reference and update flags.
586  if (!tl) {
587  if (!sl) {
588  // Base spatial and base temporal (sl = 0, tl = 0)
589  ref_frame_config->reference_last[sl] = 1;
590  ref_frame_config->reference_golden[sl] = 0;
591  ref_frame_config->reference_alt_ref[sl] = 0;
592  ref_frame_config->update_buffer_slot[sl] |=
593  1 << ref_frame_config->lst_fb_idx[sl];
594  } else {
595  if (is_key_frame) {
596  ref_frame_config->reference_last[sl] = 1;
597  ref_frame_config->reference_golden[sl] = 0;
598  ref_frame_config->reference_alt_ref[sl] = 0;
599  ref_frame_config->update_buffer_slot[sl] |=
600  1 << ref_frame_config->gld_fb_idx[sl];
601  } else {
602  // Non-zero spatiall layer.
603  ref_frame_config->reference_last[sl] = 1;
604  ref_frame_config->reference_golden[sl] = 1;
605  ref_frame_config->reference_alt_ref[sl] = 1;
606  ref_frame_config->update_buffer_slot[sl] |=
607  1 << ref_frame_config->lst_fb_idx[sl];
608  }
609  }
610  } else if (tl == 1) {
611  if (!sl) {
612  // Base spatial and top temporal (tl = 1)
613  ref_frame_config->reference_last[sl] = 1;
614  ref_frame_config->reference_golden[sl] = 0;
615  ref_frame_config->reference_alt_ref[sl] = 0;
616  ref_frame_config->update_buffer_slot[sl] |=
617  1 << ref_frame_config->alt_fb_idx[sl];
618  } else {
619  // Non-zero spatial.
620  if (sl < num_spatial_layers - 1) {
621  ref_frame_config->reference_last[sl] = 1;
622  ref_frame_config->reference_golden[sl] = 1;
623  ref_frame_config->reference_alt_ref[sl] = 0;
624  ref_frame_config->update_buffer_slot[sl] |=
625  1 << ref_frame_config->alt_fb_idx[sl];
626  } else if (sl == num_spatial_layers - 1) {
627  // Top spatial and top temporal (non-reference -- doesn't update any
628  // reference buffers)
629  ref_frame_config->reference_last[sl] = 1;
630  ref_frame_config->reference_golden[sl] = 1;
631  ref_frame_config->reference_alt_ref[sl] = 0;
632  }
633  }
634  }
635  }
636 }
637 
638 // Example pattern for 2 spatial layers and 2 temporal layers used in the
639 // bypass/flexible mode, except only 1 spatial layer when temporal_layer_id = 1.
640 static void set_frame_flags_bypass_mode_ex1(
641  int tl, int num_spatial_layers, int is_key_frame,
642  vpx_svc_ref_frame_config_t *ref_frame_config) {
643  int sl;
644  for (sl = 0; sl < num_spatial_layers; ++sl)
645  ref_frame_config->update_buffer_slot[sl] = 0;
646 
647  if (tl == 0) {
648  if (is_key_frame) {
649  ref_frame_config->lst_fb_idx[1] = 0;
650  ref_frame_config->gld_fb_idx[1] = 1;
651  } else {
652  ref_frame_config->lst_fb_idx[1] = 1;
653  ref_frame_config->gld_fb_idx[1] = 0;
654  }
655  ref_frame_config->alt_fb_idx[1] = 0;
656 
657  ref_frame_config->lst_fb_idx[0] = 0;
658  ref_frame_config->gld_fb_idx[0] = 0;
659  ref_frame_config->alt_fb_idx[0] = 0;
660  }
661  if (tl == 1) {
662  ref_frame_config->lst_fb_idx[0] = 0;
663  ref_frame_config->gld_fb_idx[0] = 1;
664  ref_frame_config->alt_fb_idx[0] = 2;
665 
666  ref_frame_config->lst_fb_idx[1] = 1;
667  ref_frame_config->gld_fb_idx[1] = 2;
668  ref_frame_config->alt_fb_idx[1] = 3;
669  }
670  // Set the reference and update flags.
671  if (tl == 0) {
672  // Base spatial and base temporal (sl = 0, tl = 0)
673  ref_frame_config->reference_last[0] = 1;
674  ref_frame_config->reference_golden[0] = 0;
675  ref_frame_config->reference_alt_ref[0] = 0;
676  ref_frame_config->update_buffer_slot[0] |=
677  1 << ref_frame_config->lst_fb_idx[0];
678 
679  if (is_key_frame) {
680  ref_frame_config->reference_last[1] = 1;
681  ref_frame_config->reference_golden[1] = 0;
682  ref_frame_config->reference_alt_ref[1] = 0;
683  ref_frame_config->update_buffer_slot[1] |=
684  1 << ref_frame_config->gld_fb_idx[1];
685  } else {
686  // Non-zero spatiall layer.
687  ref_frame_config->reference_last[1] = 1;
688  ref_frame_config->reference_golden[1] = 1;
689  ref_frame_config->reference_alt_ref[1] = 1;
690  ref_frame_config->update_buffer_slot[1] |=
691  1 << ref_frame_config->lst_fb_idx[1];
692  }
693  }
694  if (tl == 1) {
695  // Top spatial and top temporal (non-reference -- doesn't update any
696  // reference buffers)
697  ref_frame_config->reference_last[1] = 1;
698  ref_frame_config->reference_golden[1] = 0;
699  ref_frame_config->reference_alt_ref[1] = 0;
700  }
701 }
702 
703 #if CONFIG_VP9_DECODER && !SIMULCAST_MODE
704 static void test_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder,
705  const int frames_out, int *mismatch_seen) {
706  vpx_image_t enc_img, dec_img;
707  struct vp9_ref_frame ref_enc, ref_dec;
708  if (*mismatch_seen) return;
709  /* Get the internal reference frame */
710  ref_enc.idx = 0;
711  ref_dec.idx = 0;
712  vpx_codec_control(encoder, VP9_GET_REFERENCE, &ref_enc);
713  enc_img = ref_enc.img;
714  vpx_codec_control(decoder, VP9_GET_REFERENCE, &ref_dec);
715  dec_img = ref_dec.img;
716 #if CONFIG_VP9_HIGHBITDEPTH
717  if ((enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) !=
718  (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH)) {
719  if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
720  vpx_img_alloc(&enc_img, enc_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH,
721  enc_img.d_w, enc_img.d_h, 16);
722  vpx_img_truncate_16_to_8(&enc_img, &ref_enc.img);
723  }
724  if (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
725  vpx_img_alloc(&dec_img, dec_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH,
726  dec_img.d_w, dec_img.d_h, 16);
727  vpx_img_truncate_16_to_8(&dec_img, &ref_dec.img);
728  }
729  }
730 #endif
731 
732  if (!compare_img(&enc_img, &dec_img)) {
733  int y[4], u[4], v[4];
734 #if CONFIG_VP9_HIGHBITDEPTH
735  if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) {
736  find_mismatch_high(&enc_img, &dec_img, y, u, v);
737  } else {
738  find_mismatch(&enc_img, &dec_img, y, u, v);
739  }
740 #else
741  find_mismatch(&enc_img, &dec_img, y, u, v);
742 #endif
743  decoder->err = 1;
744  printf(
745  "Encode/decode mismatch on frame %d at"
746  " Y[%d, %d] {%d/%d},"
747  " U[%d, %d] {%d/%d},"
748  " V[%d, %d] {%d/%d}\n",
749  frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1],
750  v[2], v[3]);
751  *mismatch_seen = frames_out;
752  }
753 
754  vpx_img_free(&enc_img);
755  vpx_img_free(&dec_img);
756 }
757 #endif
758 
759 #if OUTPUT_RC_STATS
760 static void svc_output_rc_stats(
761  vpx_codec_ctx_t *codec, vpx_codec_enc_cfg_t *enc_cfg,
762  vpx_svc_layer_id_t *layer_id, const vpx_codec_cx_pkt_t *cx_pkt,
763  struct RateControlStats *rc, VpxVideoWriter **outfile,
764  const uint32_t frame_cnt, const double framerate) {
765  int num_layers_encoded = 0;
766  unsigned int sl, tl;
767  uint64_t sizes[8];
768  uint64_t sizes_parsed[8];
769  int count = 0;
770  double sum_bitrate = 0.0;
771  double sum_bitrate2 = 0.0;
772  vp9_zero(sizes);
773  vp9_zero(sizes_parsed);
774  vpx_codec_control(codec, VP9E_GET_SVC_LAYER_ID, layer_id);
775  parse_superframe_index(cx_pkt->data.frame.buf, cx_pkt->data.frame.sz,
776  sizes_parsed, &count);
777  if (enc_cfg->ss_number_layers == 1) {
778  sizes[0] = cx_pkt->data.frame.sz;
779  } else {
780  for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
781  sizes[sl] = 0;
782  if (cx_pkt->data.frame.spatial_layer_encoded[sl]) {
783  sizes[sl] = sizes_parsed[num_layers_encoded];
784  num_layers_encoded++;
785  }
786  }
787  }
788  for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
789  unsigned int sl2;
790  uint64_t tot_size = 0;
791 #if SIMULCAST_MODE
792  for (sl2 = 0; sl2 < sl; ++sl2) {
793  if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2];
794  }
795  vpx_video_writer_write_frame(outfile[sl],
796  (uint8_t *)(cx_pkt->data.frame.buf) + tot_size,
797  (size_t)(sizes[sl]), cx_pkt->data.frame.pts);
798 #else
799  for (sl2 = 0; sl2 <= sl; ++sl2) {
800  if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2];
801  }
802  if (tot_size > 0)
803  vpx_video_writer_write_frame(outfile[sl], cx_pkt->data.frame.buf,
804  (size_t)(tot_size), cx_pkt->data.frame.pts);
805 #endif // SIMULCAST_MODE
806  }
807  for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
808  if (cx_pkt->data.frame.spatial_layer_encoded[sl]) {
809  for (tl = layer_id->temporal_layer_id; tl < enc_cfg->ts_number_layers;
810  ++tl) {
811  const int layer = sl * enc_cfg->ts_number_layers + tl;
812  ++rc->layer_tot_enc_frames[layer];
813  rc->layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
814  // Keep count of rate control stats per layer, for non-key
815  // frames.
816  if (tl == (unsigned int)layer_id->temporal_layer_id &&
817  !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
818  rc->layer_avg_frame_size[layer] += 8.0 * sizes[sl];
819  rc->layer_avg_rate_mismatch[layer] +=
820  fabs(8.0 * sizes[sl] - rc->layer_pfb[layer]) /
821  rc->layer_pfb[layer];
822  ++rc->layer_enc_frames[layer];
823  }
824  }
825  }
826  }
827 
828  // Update for short-time encoding bitrate states, for moving
829  // window of size rc->window, shifted by rc->window / 2.
830  // Ignore first window segment, due to key frame.
831  if (frame_cnt > (unsigned int)rc->window_size) {
832  for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
833  if (cx_pkt->data.frame.spatial_layer_encoded[sl])
834  sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
835  }
836  if (frame_cnt % rc->window_size == 0) {
837  rc->window_count += 1;
838  rc->avg_st_encoding_bitrate += sum_bitrate / rc->window_size;
839  rc->variance_st_encoding_bitrate +=
840  (sum_bitrate / rc->window_size) * (sum_bitrate / rc->window_size);
841  }
842  }
843 
844  // Second shifted window.
845  if (frame_cnt > (unsigned int)(rc->window_size + rc->window_size / 2)) {
846  for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
847  sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
848  }
849 
850  if (frame_cnt > (unsigned int)(2 * rc->window_size) &&
851  frame_cnt % rc->window_size == 0) {
852  rc->window_count += 1;
853  rc->avg_st_encoding_bitrate += sum_bitrate2 / rc->window_size;
854  rc->variance_st_encoding_bitrate +=
855  (sum_bitrate2 / rc->window_size) * (sum_bitrate2 / rc->window_size);
856  }
857  }
858 }
859 #endif
860 
861 int main(int argc, const char **argv) {
862  AppInput app_input;
863  VpxVideoWriter *writer = NULL;
864  VpxVideoInfo info;
865  vpx_codec_ctx_t encoder;
866  vpx_codec_enc_cfg_t enc_cfg;
867  SvcContext svc_ctx;
868  vpx_svc_frame_drop_t svc_drop_frame;
869  uint32_t i;
870  uint32_t frame_cnt = 0;
871  vpx_image_t raw;
872  vpx_codec_err_t res;
873  int pts = 0; /* PTS starts at 0 */
874  int frame_duration = 1; /* 1 timebase tick per frame */
875  int end_of_stream = 0;
876  int frames_received = 0;
877 #if OUTPUT_RC_STATS
878  VpxVideoWriter *outfile[VPX_SS_MAX_LAYERS] = { NULL };
879  struct RateControlStats rc;
880  vpx_svc_layer_id_t layer_id;
881  vpx_svc_ref_frame_config_t ref_frame_config;
882  unsigned int sl;
883  double framerate = 30.0;
884 #endif
885  struct vpx_usec_timer timer;
886  int64_t cx_time = 0;
887 #if CONFIG_INTERNAL_STATS
888  FILE *f = fopen("opsnr.stt", "a");
889 #endif
890 #if CONFIG_VP9_DECODER && !SIMULCAST_MODE
891  int mismatch_seen = 0;
892  vpx_codec_ctx_t decoder;
893 #endif
894  memset(&svc_ctx, 0, sizeof(svc_ctx));
895  memset(&app_input, 0, sizeof(AppInput));
896  memset(&info, 0, sizeof(VpxVideoInfo));
897  memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t));
898  memset(&rc, 0, sizeof(struct RateControlStats));
899  exec_name = argv[0];
900 
901  /* Setup default input stream settings */
902  app_input.input_ctx.framerate.numerator = 30;
903  app_input.input_ctx.framerate.denominator = 1;
904  app_input.input_ctx.only_i420 = 1;
905  app_input.input_ctx.bit_depth = 0;
906 
907  parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);
908 
909  // Y4M reader handles its own allocation.
910  if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
911 // Allocate image buffer
912 #if CONFIG_VP9_HIGHBITDEPTH
913  if (!vpx_img_alloc(&raw,
914  enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420
915  : VPX_IMG_FMT_I42016,
916  enc_cfg.g_w, enc_cfg.g_h, 32)) {
917  die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
918  }
919 #else
920  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) {
921  die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
922  }
923 #endif // CONFIG_VP9_HIGHBITDEPTH
924  }
925 
926  // Initialize codec
927  if (vpx_svc_init(&svc_ctx, &encoder, vpx_codec_vp9_cx(), &enc_cfg) !=
928  VPX_CODEC_OK)
929  die("Failed to initialize encoder\n");
930 #if CONFIG_VP9_DECODER && !SIMULCAST_MODE
931  if (vpx_codec_dec_init(
932  &decoder, get_vpx_decoder_by_name("vp9")->codec_interface(), NULL, 0))
933  die("Failed to initialize decoder\n");
934 #endif
935 
936 #if OUTPUT_RC_STATS
937  rc.window_count = 1;
938  rc.window_size = 15; // Silence a static analysis warning.
939  rc.avg_st_encoding_bitrate = 0.0;
940  rc.variance_st_encoding_bitrate = 0.0;
941  if (svc_ctx.output_rc_stat) {
942  set_rate_control_stats(&rc, &enc_cfg);
943  framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num;
944  }
945 #endif
946 
947  info.codec_fourcc = VP9_FOURCC;
948  info.frame_width = enc_cfg.g_w;
949  info.frame_height = enc_cfg.g_h;
950  info.time_base.numerator = enc_cfg.g_timebase.num;
951  info.time_base.denominator = enc_cfg.g_timebase.den;
952 
953  writer =
954  vpx_video_writer_open(app_input.output_filename, kContainerIVF, &info);
955  if (!writer)
956  die("Failed to open %s for writing\n", app_input.output_filename);
957 
958 #if OUTPUT_RC_STATS
959  // Write out spatial layer stream.
960  // TODO(marpan/jianj): allow for writing each spatial and temporal stream.
961  if (svc_ctx.output_rc_stat) {
962  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
963  char file_name[PATH_MAX];
964 
965  snprintf(file_name, sizeof(file_name), "%s_s%d.ivf",
966  app_input.output_filename, sl);
967  outfile[sl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
968  if (!outfile[sl]) die("Failed to open %s for writing", file_name);
969  }
970  }
971 #endif
972 
973  // skip initial frames
974  for (i = 0; i < app_input.frames_to_skip; ++i)
975  read_frame(&app_input.input_ctx, &raw);
976 
977  if (svc_ctx.speed != -1)
978  vpx_codec_control(&encoder, VP8E_SET_CPUUSED, svc_ctx.speed);
979  if (svc_ctx.threads) {
981  get_msb(svc_ctx.threads));
982  if (svc_ctx.threads > 1)
983  vpx_codec_control(&encoder, VP9E_SET_ROW_MT, 1);
984  else
985  vpx_codec_control(&encoder, VP9E_SET_ROW_MT, 0);
986  }
987  if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
988  vpx_codec_control(&encoder, VP9E_SET_AQ_MODE, 3);
989  if (svc_ctx.speed >= 5)
992 
994  app_input.inter_layer_pred);
995 
997 
998  vpx_codec_control(&encoder, VP9E_SET_TUNE_CONTENT, app_input.tune_content);
999 
1002 
1003  svc_drop_frame.framedrop_mode = FULL_SUPERFRAME_DROP;
1004  for (sl = 0; sl < (unsigned int)svc_ctx.spatial_layers; ++sl)
1005  svc_drop_frame.framedrop_thresh[sl] = enc_cfg.rc_dropframe_thresh;
1006  svc_drop_frame.max_consec_drop = INT_MAX;
1007  vpx_codec_control(&encoder, VP9E_SET_SVC_FRAME_DROP_LAYER, &svc_drop_frame);
1008 
1009  // Encode frames
1010  while (!end_of_stream) {
1011  vpx_codec_iter_t iter = NULL;
1012  const vpx_codec_cx_pkt_t *cx_pkt;
1013  // Example patterns for bypass/flexible mode:
1014  // example_pattern = 0: 2 temporal layers, and spatial_layers = 1,2,3. Exact
1015  // to fixed SVC patterns. example_pattern = 1: 2 spatial and 2 temporal
1016  // layers, with SL0 only has TL0, and SL1 has both TL0 and TL1. This example
1017  // uses the extended API.
1018  int example_pattern = 0;
1019  if (frame_cnt >= app_input.frames_to_code ||
1020  !read_frame(&app_input.input_ctx, &raw)) {
1021  // We need one extra vpx_svc_encode call at end of stream to flush
1022  // encoder and get remaining data
1023  end_of_stream = 1;
1024  }
1025 
1026  // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates)
1027  // and the buffer indices for each spatial layer of the current
1028  // (super)frame to be encoded. The spatial and temporal layer_id for the
1029  // current frame also needs to be set.
1030  // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS"
1031  // mode to "VP9E_LAYERING_MODE_BYPASS".
1032  if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
1033  layer_id.spatial_layer_id = 0;
1034  // Example for 2 temporal layers.
1035  if (frame_cnt % 2 == 0) {
1036  layer_id.temporal_layer_id = 0;
1037  for (i = 0; i < VPX_SS_MAX_LAYERS; i++)
1038  layer_id.temporal_layer_id_per_spatial[i] = 0;
1039  } else {
1040  layer_id.temporal_layer_id = 1;
1041  for (i = 0; i < VPX_SS_MAX_LAYERS; i++)
1042  layer_id.temporal_layer_id_per_spatial[i] = 1;
1043  }
1044  if (example_pattern == 1) {
1045  // example_pattern 1 is hard-coded for 2 spatial and 2 temporal layers.
1046  assert(svc_ctx.spatial_layers == 2);
1047  assert(svc_ctx.temporal_layers == 2);
1048  if (frame_cnt % 2 == 0) {
1049  // Spatial layer 0 and 1 are encoded.
1050  layer_id.temporal_layer_id_per_spatial[0] = 0;
1051  layer_id.temporal_layer_id_per_spatial[1] = 0;
1052  layer_id.spatial_layer_id = 0;
1053  } else {
1054  // Only spatial layer 1 is encoded here.
1055  layer_id.temporal_layer_id_per_spatial[1] = 1;
1056  layer_id.spatial_layer_id = 1;
1057  }
1058  }
1059  vpx_codec_control(&encoder, VP9E_SET_SVC_LAYER_ID, &layer_id);
1060  // TODO(jianj): Fix the parameter passing for "is_key_frame" in
1061  // set_frame_flags_bypass_model() for case of periodic key frames.
1062  if (example_pattern == 0) {
1063  set_frame_flags_bypass_mode_ex0(layer_id.temporal_layer_id,
1064  svc_ctx.spatial_layers, frame_cnt == 0,
1065  &ref_frame_config);
1066  } else if (example_pattern == 1) {
1067  set_frame_flags_bypass_mode_ex1(layer_id.temporal_layer_id,
1068  svc_ctx.spatial_layers, frame_cnt == 0,
1069  &ref_frame_config);
1070  }
1071  ref_frame_config.duration[0] = frame_duration * 1;
1072  ref_frame_config.duration[1] = frame_duration * 1;
1073 
1075  &ref_frame_config);
1076  // Keep track of input frames, to account for frame drops in rate control
1077  // stats/metrics.
1078  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
1079  ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
1080  layer_id.temporal_layer_id];
1081  }
1082  } else {
1083  // For the fixed pattern SVC, temporal layer is given by superframe count.
1084  unsigned int tl = 0;
1085  if (enc_cfg.ts_number_layers == 2)
1086  tl = (frame_cnt % 2 != 0);
1087  else if (enc_cfg.ts_number_layers == 3) {
1088  if (frame_cnt % 2 != 0) tl = 2;
1089  if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0)) tl = 1;
1090  }
1091  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl)
1092  ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + tl];
1093  }
1094 
1095  vpx_usec_timer_start(&timer);
1096  res = vpx_svc_encode(
1097  &svc_ctx, &encoder, (end_of_stream ? NULL : &raw), pts, frame_duration,
1098  svc_ctx.speed >= 5 ? VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
1099  vpx_usec_timer_mark(&timer);
1100  cx_time += vpx_usec_timer_elapsed(&timer);
1101 
1102  fflush(stdout);
1103  if (res != VPX_CODEC_OK) {
1104  die_codec(&encoder, "Failed to encode frame");
1105  }
1106 
1107  while ((cx_pkt = vpx_codec_get_cx_data(&encoder, &iter)) != NULL) {
1108  switch (cx_pkt->kind) {
1109  case VPX_CODEC_CX_FRAME_PKT: {
1110  SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal;
1111  if (cx_pkt->data.frame.sz > 0) {
1112  vpx_video_writer_write_frame(writer, cx_pkt->data.frame.buf,
1113  cx_pkt->data.frame.sz,
1114  cx_pkt->data.frame.pts);
1115 #if OUTPUT_RC_STATS
1116  if (svc_ctx.output_rc_stat) {
1117  svc_output_rc_stats(&encoder, &enc_cfg, &layer_id, cx_pkt, &rc,
1118  outfile, frame_cnt, framerate);
1119  }
1120 #endif
1121  }
1122  /*
1123  printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
1124  !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
1125  (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
1126  */
1127  if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1)
1128  si->bytes_sum[0] += (int)cx_pkt->data.frame.sz;
1129  ++frames_received;
1130 #if CONFIG_VP9_DECODER && !SIMULCAST_MODE
1131  if (vpx_codec_decode(&decoder, cx_pkt->data.frame.buf,
1132  (unsigned int)cx_pkt->data.frame.sz, NULL, 0))
1133  die_codec(&decoder, "Failed to decode frame.");
1134 #endif
1135  break;
1136  }
1137  case VPX_CODEC_STATS_PKT: {
1138  stats_write(&app_input.rc_stats, cx_pkt->data.twopass_stats.buf,
1139  cx_pkt->data.twopass_stats.sz);
1140  break;
1141  }
1142  default: { break; }
1143  }
1144 
1145 #if CONFIG_VP9_DECODER && !SIMULCAST_MODE
1146  vpx_codec_control(&encoder, VP9E_GET_SVC_LAYER_ID, &layer_id);
1147  // Don't look for mismatch on top spatial and top temporal layers as they
1148  // are non reference frames.
1149  if ((enc_cfg.ss_number_layers > 1 || enc_cfg.ts_number_layers > 1) &&
1150  !(layer_id.temporal_layer_id > 0 &&
1151  layer_id.temporal_layer_id == (int)enc_cfg.ts_number_layers - 1 &&
1152  cx_pkt->data.frame
1153  .spatial_layer_encoded[enc_cfg.ss_number_layers - 1])) {
1154  test_decode(&encoder, &decoder, frame_cnt, &mismatch_seen);
1155  }
1156 #endif
1157  }
1158 
1159  if (!end_of_stream) {
1160  ++frame_cnt;
1161  pts += frame_duration;
1162  }
1163  }
1164 
1165  printf("Processed %d frames\n", frame_cnt);
1166 
1167  close_input_file(&app_input.input_ctx);
1168 
1169 #if OUTPUT_RC_STATS
1170  if (svc_ctx.output_rc_stat) {
1171  printout_rate_control_summary(&rc, &enc_cfg, frame_cnt);
1172  printf("\n");
1173  }
1174 #endif
1175  if (vpx_codec_destroy(&encoder))
1176  die_codec(&encoder, "Failed to destroy codec");
1177  if (writer) {
1178  vpx_video_writer_close(writer);
1179  }
1180 #if OUTPUT_RC_STATS
1181  if (svc_ctx.output_rc_stat) {
1182  for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
1183  vpx_video_writer_close(outfile[sl]);
1184  }
1185  }
1186 #endif
1187 #if CONFIG_INTERNAL_STATS
1188  if (mismatch_seen) {
1189  fprintf(f, "First mismatch occurred in frame %d\n", mismatch_seen);
1190  } else {
1191  fprintf(f, "No mismatch detected in recon buffers\n");
1192  }
1193  fclose(f);
1194 #endif
1195  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
1196  frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
1197  1000000 * (double)frame_cnt / (double)cx_time);
1198  if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
1199  vpx_img_free(&raw);
1200  }
1201  // display average size, psnr
1202  vpx_svc_dump_statistics(&svc_ctx);
1203  vpx_svc_release(&svc_ctx);
1204  return EXIT_SUCCESS;
1205 }
vpx_fixed_buf_t twopass_stats
Definition: vpx_encoder.h:184
unsigned int ts_number_layers
Number of temporal coding layers.
Definition: vpx_encoder.h:646
Codec control function to disable increase Q on overshoot in CBR.
Definition: vp8cx.h:695
Codec control function to set encoder internal speed settings.
Definition: vp8cx.h:156
#define VPX_MAX_LAYERS
Definition: vpx_encoder.h:44
int reference_alt_ref[5]
Definition: vp8cx.h:864
Image Descriptor.
Definition: vpx_image.h:72
Describes the encoder algorithm interface to applications.
const char * vpx_codec_iface_name(vpx_codec_iface_t *iface)
Return the name for a given interface.
Codec control function to constrain the inter-layer prediction (prediction of lower spatial resolutio...
Definition: vp8cx.h:620
const char * vpx_codec_err_to_string(vpx_codec_err_t err)
Convert error number to printable string.
int lst_fb_idx[5]
Definition: vp8cx.h:854
Codec control function to set content type.
Definition: vp8cx.h:464
struct vpx_rational g_timebase
Stream timebase units.
Definition: vpx_encoder.h:345
Codec control function to set noise sensitivity.
Definition: vp8cx.h:422
unsigned int layer_target_bitrate[12]
Target bitrate for each spatial/temporal layer.
Definition: vpx_encoder.h:686
SVC_LAYER_DROP_MODE framedrop_mode
Definition: vp8cx.h:892
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition: vpx_encoder.h:331
int den
Definition: vpx_encoder.h:222
Definition: vpx_encoder.h:150
int framedrop_thresh[5]
Definition: vp8cx.h:890
unsigned int kf_max_dist
Keyframe maximum interval.
Definition: vpx_encoder.h:616
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition: vpx_encoder.h:374
Encoder configuration structure.
Definition: vpx_encoder.h:270
int reference_golden[5]
Definition: vp8cx.h:863
The coded data for this stream is corrupt or incomplete.
Definition: vpx_codec.h:133
Codec control function to set row level multi-threading.
Definition: vp8cx.h:571
Codec control function to disable loopfilter.
Definition: vp8cx.h:704
Codec control function to set Max data rate for Intra frames.
Definition: vp8cx.h:258
Encoder output packet.
Definition: vpx_encoder.h:161
void * buf
Definition: vpx_encoder.h:99
unsigned int ts_rate_decimator[5]
Frame rate decimation factor for each temporal layer.
Definition: vpx_encoder.h:660
unsigned int kf_min_dist
Keyframe minimum interval.
Definition: vpx_encoder.h:607
vp9 svc frame dropping parameters.
Definition: vp8cx.h:889
unsigned int g_profile
Bitstream profile to use.
Definition: vpx_encoder.h:297
Codec control function to set number of tile columns.
Definition: vp8cx.h:352
#define VPX_IMG_FMT_HIGHBITDEPTH
Definition: vpx_image.h:35
struct vpx_codec_cx_pkt::@1::@2 frame
#define VPX_SS_MAX_LAYERS
Definition: vpx_encoder.h:47
vpx_image_t * vpx_img_alloc(vpx_image_t *img, vpx_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
Definition: vpx_image.h:42
unsigned int d_w
Definition: vpx_image.h:83
#define vpx_codec_dec_init(ctx, iface, cfg, flags)
Convenience macro for vpx_codec_dec_init_ver()
Definition: vpx_decoder.h:143
unsigned int g_w
Width of the frame.
Definition: vpx_encoder.h:306
int reference_last[5]
Definition: vp8cx.h:862
int update_buffer_slot[5]
Definition: vp8cx.h:857
Codec control function to set adaptive quantization mode.
Definition: vp8cx.h:399
vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t *ctx, const uint8_t *data, unsigned int data_sz, void *user_priv, long deadline)
Decode data.
Codec control function to get svc layer ID.
Definition: vp8cx.h:472
unsigned int g_h
Height of the frame.
Definition: vpx_encoder.h:315
enum vpx_codec_cx_pkt_kind kind
Definition: vpx_encoder.h:162
unsigned int rc_dropframe_thresh
Temporal resampling configuration, if supported by the codec.
Definition: vpx_encoder.h:393
vp9 svc layer parameters
Definition: vp8cx.h:838
Operation completed without error.
Definition: vpx_codec.h:95
void vpx_img_free(vpx_image_t *img)
Close an image descriptor.
vpx_img_fmt_t fmt
Definition: vpx_image.h:73
unsigned int rc_target_bitrate
Target data rate.
Definition: vpx_encoder.h:462
#define VPX_DL_REALTIME
deadline parameter analogous to VPx REALTIME mode.
Definition: vpx_encoder.h:833
int num
Definition: vpx_encoder.h:221
Definition: vpx_codec.h:223
vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface, vpx_codec_enc_cfg_t *cfg, unsigned int usage)
Get a default configuration.
Codec control function to set the frame flags and buffer indices for spatial layers. The frame flags and buffer indices are set using the struct vpx_svc_ref_frame_config defined below.
Definition: vp8cx.h:546
enum vpx_enc_pass g_pass
Multi-pass Encoding Mode.
Definition: vpx_encoder.h:360
Codec control function to set mode and thresholds for frame dropping in SVC. Drop frame thresholds ar...
Definition: vp8cx.h:629
#define VPX_DL_GOOD_QUALITY
deadline parameter analogous to VPx GOOD QUALITY mode.
Definition: vpx_encoder.h:835
unsigned int ss_number_layers
Number of spatial coding layers.
Definition: vpx_encoder.h:626
vpx_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition: vpx_encoder.h:323
Provides definitions for using VP8 or VP9 encoder algorithm within the vpx Codec Interface.
Bypass mode. Used when application needs to control temporal layering. This will only work when the n...
Definition: vp8cx.h:744
Definition: vp8cx.h:877
vpx_codec_err_t
Algorithm return codes.
Definition: vpx_codec.h:93
const vpx_codec_cx_pkt_t * vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx, vpx_codec_iter_t *iter)
Encoded data iterator.
union vpx_codec_cx_pkt::@1 data
int temporal_layering_mode
Temporal layering mode indicating which temporal layering scheme to use.
Definition: vpx_encoder.h:695
VP9 specific reference frame data struct.
Definition: vp8.h:110
int temporal_layer_id
Definition: vp8cx.h:841
int max_consec_drop
Definition: vp8cx.h:893
Definition: vpx_encoder.h:236
int idx
Definition: vp8.h:111
#define vpx_codec_control(ctx, id, data)
vpx_codec_control wrapper macro
Definition: vpx_codec.h:407
vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx)
Destroy a codec instance.
unsigned int d_h
Definition: vpx_image.h:84
size_t sz
Definition: vpx_encoder.h:100
Definition: vpx_codec.h:221
vp9 svc frame flag parameters.
Definition: vp8cx.h:853
vpx_codec_err_t err
Definition: vpx_codec.h:203
Definition: vp8.h:55
Codec control function to set the threshold for MBs treated static.
Definition: vp8cx.h:189
int64_t duration[5]
Definition: vp8cx.h:865
#define VPX_FRAME_IS_KEY
Definition: vpx_encoder.h:118
Definition: vpx_codec.h:222
int alt_fb_idx[5]
Definition: vp8cx.h:856
const void * vpx_codec_iter_t
Iterator.
Definition: vpx_codec.h:190
Definition: vpx_encoder.h:149
unsigned int rc_2pass_vbr_maxsection_pct
Two-pass mode per-GOP maximum bitrate.
Definition: vpx_encoder.h:579
vpx_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition: vpx_encoder.h:353
unsigned int rc_2pass_vbr_minsection_pct
Two-pass mode per-GOP minimum bitrate.
Definition: vpx_encoder.h:572
int gld_fb_idx[5]
Definition: vp8cx.h:855
Codec control function to set svc layer for spatial and temporal.
Definition: vp8cx.h:454
enum vpx_rc_mode rc_end_usage
Rate control algorithm to use.
Definition: vpx_encoder.h:442
Definition: vpx_encoder.h:227
Codec context structure.
Definition: vpx_codec.h:200