2 * Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
3 * Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Calculate the VMAF between two input videos.
27 #include "config_components.h"
31 #include "libavutil/avstring.h"
32 #include "libavutil/opt.h"
33 #include "libavutil/pixdesc.h"
35 #include "drawutils.h"
37 #include "framesync.h"
41 #if CONFIG_LIBVMAF_CUDA_FILTER
42 #include <libvmaf_cuda.h>
44 #include "libavutil/hwcontext.h"
45 #include "libavutil/hwcontext_cuda_internal.h"
48 typedef struct LIBVMAFContext {
62 int enable_conf_interval;
70 #if CONFIG_LIBVMAF_CUDA_FILTER
71 VmafCudaState *cu_state;
75 #define OFFSET(x) offsetof(LIBVMAFContext, x)
76 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
78 static const AVOption libvmaf_options[] = {
79 {"log_path", "Set the file path to be used to write log.", OFFSET(log_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
80 {"log_fmt", "Set the format of the log (csv, json, xml, or sub).", OFFSET(log_fmt), AV_OPT_TYPE_STRING, {.str="xml"}, 0, 1, FLAGS},
81 {"pool", "Set the pool method to be used for computing vmaf.", OFFSET(pool), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
82 {"n_threads", "Set number of threads to be used when computing vmaf.", OFFSET(n_threads), AV_OPT_TYPE_INT, {.i64=0}, 0, UINT_MAX, FLAGS},
83 {"n_subsample", "Set interval for frame subsampling used when computing vmaf.", OFFSET(n_subsample), AV_OPT_TYPE_INT, {.i64=1}, 1, UINT_MAX, FLAGS},
84 {"model", "Set the model to be used for computing vmaf.", OFFSET(model_cfg), AV_OPT_TYPE_STRING, {.str="version=vmaf_v0.6.1"}, 0, 1, FLAGS},
85 {"feature", "Set the feature to be used for computing vmaf.", OFFSET(feature_cfg), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
89 FRAMESYNC_DEFINE_CLASS(libvmaf, LIBVMAFContext, fs);
91 static enum VmafPixelFormat pix_fmt_map(enum AVPixelFormat av_pix_fmt)
94 case AV_PIX_FMT_YUV420P:
95 case AV_PIX_FMT_YUV420P10LE:
96 case AV_PIX_FMT_YUV420P12LE:
97 case AV_PIX_FMT_YUV420P16LE:
98 return VMAF_PIX_FMT_YUV420P;
99 case AV_PIX_FMT_YUV422P:
100 case AV_PIX_FMT_YUV422P10LE:
101 case AV_PIX_FMT_YUV422P12LE:
102 case AV_PIX_FMT_YUV422P16LE:
103 return VMAF_PIX_FMT_YUV422P;
104 case AV_PIX_FMT_YUV444P:
105 case AV_PIX_FMT_YUV444P10LE:
106 case AV_PIX_FMT_YUV444P12LE:
107 case AV_PIX_FMT_YUV444P16LE:
108 return VMAF_PIX_FMT_YUV444P;
110 return VMAF_PIX_FMT_UNKNOWN;
114 static int copy_picture_data(AVFrame *src, VmafPicture *dst, unsigned bpc)
116 const int bytes_per_value = bpc > 8 ? 2 : 1;
117 int err = vmaf_picture_alloc(dst, pix_fmt_map(src->format), bpc,
118 src->width, src->height);
120 return AVERROR(ENOMEM);
122 for (unsigned i = 0; i < 3; i++) {
123 uint8_t *src_data = src->data[i];
124 uint8_t *dst_data = dst->data[i];
125 for (unsigned j = 0; j < dst->h[i]; j++) {
126 memcpy(dst_data, src_data, bytes_per_value * dst->w[i]);
127 src_data += src->linesize[i];
128 dst_data += dst->stride[i];
135 static int do_vmaf(FFFrameSync *fs)
137 AVFilterContext *ctx = fs->parent;
138 LIBVMAFContext *s = ctx->priv;
139 VmafPicture pic_ref, pic_dist;
143 int ret = ff_framesync_dualinput_get(fs, &dist, &ref);
146 if (ctx->is_disabled || !ref)
147 return ff_filter_frame(ctx->outputs[0], dist);
149 if (dist->color_range != ref->color_range) {
150 av_log(ctx, AV_LOG_WARNING, "distorted and reference "
151 "frames use different color ranges (%s != %s)\n",
152 av_color_range_name(dist->color_range),
153 av_color_range_name(ref->color_range));
156 err = copy_picture_data(ref, &pic_ref, s->bpc);
158 av_log(s, AV_LOG_ERROR, "problem during vmaf_picture_alloc.\n");
159 return AVERROR(ENOMEM);
162 err = copy_picture_data(dist, &pic_dist, s->bpc);
164 av_log(s, AV_LOG_ERROR, "problem during vmaf_picture_alloc.\n");
165 vmaf_picture_unref(&pic_ref);
166 return AVERROR(ENOMEM);
169 err = vmaf_read_pictures(s->vmaf, &pic_ref, &pic_dist, s->frame_cnt++);
171 av_log(s, AV_LOG_ERROR, "problem during vmaf_read_pictures.\n");
172 return AVERROR(EINVAL);
175 return ff_filter_frame(ctx->outputs[0], dist);
179 static AVDictionary **delimited_dict_parse(char *str, unsigned *cnt)
181 AVDictionary **dict = NULL;
182 char *str_copy = NULL;
183 char *saveptr = NULL;
191 for (char *p = str; *p; p++) {
196 dict = av_calloc(cnt2, sizeof(*dict));
200 str_copy = av_strdup(str);
205 for (unsigned i = 0; i < cnt2; i++) {
206 char *s = av_strtok(i == 0 ? str_copy : NULL, "|", &saveptr);
209 err = av_dict_parse_string(&dict[(*cnt)++], s, "=", ":", 0);
219 for (unsigned i = 0; i < *cnt; i++) {
221 av_dict_free(&dict[i]);
231 static int parse_features(AVFilterContext *ctx)
233 LIBVMAFContext *s = ctx->priv;
234 AVDictionary **dict = NULL;
241 dict = delimited_dict_parse(s->feature_cfg, &dict_cnt);
243 av_log(ctx, AV_LOG_ERROR,
244 "could not parse feature config: %s\n", s->feature_cfg);
245 return AVERROR(EINVAL);
248 for (unsigned i = 0; i < dict_cnt; i++) {
249 char *feature_name = NULL;
250 VmafFeatureDictionary *feature_opts_dict = NULL;
251 const AVDictionaryEntry *e = NULL;
253 while (e = av_dict_iterate(dict[i], e)) {
254 if (av_stristr(e->key, "name")) {
255 feature_name = e->value;
259 err = vmaf_feature_dictionary_set(&feature_opts_dict, e->key,
262 av_log(ctx, AV_LOG_ERROR,
263 "could not set feature option: %s.%s=%s\n",
264 feature_name, e->key, e->value);
269 err = vmaf_use_feature(s->vmaf, feature_name, feature_opts_dict);
271 av_log(ctx, AV_LOG_ERROR,
272 "problem during vmaf_use_feature: %s\n", feature_name);
278 for (unsigned i = 0; i < dict_cnt; i++) {
280 av_dict_free(&dict[i]);
286 static int parse_models(AVFilterContext *ctx)
288 LIBVMAFContext *s = ctx->priv;
293 if (!s->model_cfg) return 0;
296 dict = delimited_dict_parse(s->model_cfg, &dict_cnt);
298 av_log(ctx, AV_LOG_ERROR,
299 "could not parse model config: %s\n", s->model_cfg);
300 return AVERROR(EINVAL);
303 s->model_cnt = dict_cnt;
304 s->model = av_calloc(s->model_cnt, sizeof(*s->model));
306 return AVERROR(ENOMEM);
308 for (unsigned i = 0; i < dict_cnt; i++) {
309 VmafModelConfig model_cfg = { 0 };
310 const AVDictionaryEntry *e = NULL;
311 char *version = NULL;
314 while (e = av_dict_iterate(dict[i], e)) {
315 if (av_stristr(e->key, "disable_clip")) {
316 model_cfg.flags |= av_stristr(e->value, "true") ?
317 VMAF_MODEL_FLAG_DISABLE_CLIP : 0;
321 if (av_stristr(e->key, "enable_transform")) {
322 model_cfg.flags |= av_stristr(e->value, "true") ?
323 VMAF_MODEL_FLAG_ENABLE_TRANSFORM : 0;
327 if (av_stristr(e->key, "name")) {
328 model_cfg.name = e->value;
332 if (av_stristr(e->key, "version")) {
337 if (av_stristr(e->key, "path")) {
344 err = vmaf_model_load(&s->model[i], &model_cfg, version);
346 av_log(ctx, AV_LOG_ERROR,
347 "could not load libvmaf model with version: %s\n",
353 if (path && !s->model[i]) {
354 err = vmaf_model_load_from_path(&s->model[i], &model_cfg, path);
356 av_log(ctx, AV_LOG_ERROR,
357 "could not load libvmaf model with path: %s\n",
364 av_log(ctx, AV_LOG_ERROR,
365 "could not load libvmaf model with config: %s\n",
370 while (e = av_dict_iterate(dict[i], e)) {
371 VmafFeatureDictionary *feature_opts_dict = NULL;
372 char *feature_opt = NULL;
374 char *feature_name = av_strtok(e->key, ".", &feature_opt);
378 err = vmaf_feature_dictionary_set(&feature_opts_dict,
379 feature_opt, e->value);
381 av_log(ctx, AV_LOG_ERROR,
382 "could not set feature option: %s.%s=%s\n",
383 feature_name, feature_opt, e->value);
384 err = AVERROR(EINVAL);
388 err = vmaf_model_feature_overload(s->model[i], feature_name,
391 av_log(ctx, AV_LOG_ERROR,
392 "could not overload feature: %s\n", feature_name);
393 err = AVERROR(EINVAL);
399 for (unsigned i = 0; i < s->model_cnt; i++) {
400 err = vmaf_use_features_from_model(s->vmaf, s->model[i]);
402 av_log(ctx, AV_LOG_ERROR,
403 "problem during vmaf_use_features_from_model\n");
404 err = AVERROR(EINVAL);
410 for (unsigned i = 0; i < dict_cnt; i++) {
412 av_dict_free(&dict[i]);
418 static enum VmafLogLevel log_level_map(int log_level)
422 return VMAF_LOG_LEVEL_NONE;
424 return VMAF_LOG_LEVEL_ERROR;
426 return VMAF_LOG_LEVEL_WARNING;
428 return VMAF_LOG_LEVEL_INFO;
430 return VMAF_LOG_LEVEL_DEBUG;
432 return VMAF_LOG_LEVEL_INFO;
436 static av_cold int init(AVFilterContext *ctx)
438 LIBVMAFContext *s = ctx->priv;
441 VmafConfiguration cfg = {
442 .log_level = log_level_map(av_log_get_level()),
443 .n_subsample = s->n_subsample,
444 .n_threads = s->n_threads,
447 err = vmaf_init(&s->vmaf, cfg);
449 return AVERROR(EINVAL);
451 err = parse_models(ctx);
455 err = parse_features(ctx);
459 s->fs.on_event = do_vmaf;
463 static const enum AVPixelFormat pix_fmts[] = {
464 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
465 AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV420P10LE,
466 AV_PIX_FMT_YUV444P12LE, AV_PIX_FMT_YUV422P12LE, AV_PIX_FMT_YUV420P12LE,
467 AV_PIX_FMT_YUV444P16LE, AV_PIX_FMT_YUV422P16LE, AV_PIX_FMT_YUV420P16LE,
471 static int config_input_ref(AVFilterLink *inlink)
473 AVFilterContext *ctx = inlink->dst;
474 LIBVMAFContext *s = ctx->priv;
475 const AVPixFmtDescriptor *desc;
478 if (ctx->inputs[0]->w != ctx->inputs[1]->w) {
479 av_log(ctx, AV_LOG_ERROR, "input width must match.\n");
480 err |= AVERROR(EINVAL);
483 if (ctx->inputs[0]->h != ctx->inputs[1]->h) {
484 av_log(ctx, AV_LOG_ERROR, "input height must match.\n");
485 err |= AVERROR(EINVAL);
488 if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
489 av_log(ctx, AV_LOG_ERROR, "input pix_fmt must match.\n");
490 err |= AVERROR(EINVAL);
496 desc = av_pix_fmt_desc_get(inlink->format);
497 s->bpc = desc->comp[0].depth;
502 static int config_output(AVFilterLink *outlink)
504 AVFilterContext *ctx = outlink->src;
505 LIBVMAFContext *s = ctx->priv;
506 AVFilterLink *mainlink = ctx->inputs[0];
509 ret = ff_framesync_init_dualinput(&s->fs, ctx);
512 outlink->w = mainlink->w;
513 outlink->h = mainlink->h;
514 outlink->time_base = mainlink->time_base;
515 outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
516 outlink->frame_rate = mainlink->frame_rate;
517 if ((ret = ff_framesync_configure(&s->fs)) < 0)
523 static int activate(AVFilterContext *ctx)
525 LIBVMAFContext *s = ctx->priv;
526 return ff_framesync_activate(&s->fs);
529 static enum VmafOutputFormat log_fmt_map(const char *log_fmt)
532 if (av_stristr(log_fmt, "xml"))
533 return VMAF_OUTPUT_FORMAT_XML;
534 if (av_stristr(log_fmt, "json"))
535 return VMAF_OUTPUT_FORMAT_JSON;
536 if (av_stristr(log_fmt, "csv"))
537 return VMAF_OUTPUT_FORMAT_CSV;
538 if (av_stristr(log_fmt, "sub"))
539 return VMAF_OUTPUT_FORMAT_SUB;
542 return VMAF_OUTPUT_FORMAT_XML;
545 static enum VmafPoolingMethod pool_method_map(const char *pool_method)
548 if (av_stristr(pool_method, "min"))
549 return VMAF_POOL_METHOD_MIN;
550 if (av_stristr(pool_method, "mean"))
551 return VMAF_POOL_METHOD_MEAN;
552 if (av_stristr(pool_method, "harmonic_mean"))
553 return VMAF_POOL_METHOD_HARMONIC_MEAN;
556 return VMAF_POOL_METHOD_MEAN;
559 static av_cold void uninit(AVFilterContext *ctx)
561 LIBVMAFContext *s = ctx->priv;
564 ff_framesync_uninit(&s->fs);
569 err = vmaf_read_pictures(s->vmaf, NULL, NULL, 0);
571 av_log(ctx, AV_LOG_ERROR,
572 "problem flushing libvmaf context.\n");
575 for (unsigned i = 0; i < s->model_cnt; i++) {
577 err = vmaf_score_pooled(s->vmaf, s->model[i], pool_method_map(s->pool),
578 &vmaf_score, 0, s->frame_cnt - 1);
580 av_log(ctx, AV_LOG_ERROR,
581 "problem getting pooled vmaf score.\n");
584 av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n", vmaf_score);
588 if (s->log_path && !err)
589 vmaf_write_output(s->vmaf, s->log_path, log_fmt_map(s->log_fmt));
594 for (unsigned i = 0; i < s->model_cnt; i++) {
596 vmaf_model_destroy(s->model[i]);
605 static const AVFilterPad libvmaf_inputs[] = {
608 .type = AVMEDIA_TYPE_VIDEO,
611 .type = AVMEDIA_TYPE_VIDEO,
612 .config_props = config_input_ref,
616 static const AVFilterPad libvmaf_outputs[] = {
619 .type = AVMEDIA_TYPE_VIDEO,
620 .config_props = config_output,
624 const AVFilter ff_vf_libvmaf = {
626 .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF between two video streams."),
627 .preinit = libvmaf_framesync_preinit,
630 .activate = activate,
631 .priv_size = sizeof(LIBVMAFContext),
632 .priv_class = &libvmaf_class,
633 FILTER_INPUTS(libvmaf_inputs),
634 FILTER_OUTPUTS(libvmaf_outputs),
635 FILTER_PIXFMTS_ARRAY(pix_fmts),
638 #if CONFIG_LIBVMAF_CUDA_FILTER
639 static const enum AVPixelFormat supported_formats[] = {
641 AV_PIX_FMT_YUV444P16,
644 static int format_is_supported(enum AVPixelFormat fmt)
648 for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
649 if (supported_formats[i] == fmt)
654 static int config_props_cuda(AVFilterLink *outlink)
657 AVFilterContext *ctx = outlink->src;
658 LIBVMAFContext *s = ctx->priv;
659 AVFilterLink *inlink = ctx->inputs[0];
660 AVHWFramesContext *frames_ctx = (AVHWFramesContext*) inlink->hw_frames_ctx->data;
661 AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
662 CUcontext cu_ctx = device_hwctx->cuda_ctx;
663 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frames_ctx->sw_format);
665 VmafConfiguration cfg = {
666 .log_level = log_level_map(av_log_get_level()),
667 .n_subsample = s->n_subsample,
668 .n_threads = s->n_threads,
671 VmafCudaPictureConfiguration cuda_pic_cfg = {
673 .bpc = desc->comp[0].depth,
676 .pix_fmt = pix_fmt_map(frames_ctx->sw_format),
678 .pic_prealloc_method = VMAF_CUDA_PICTURE_PREALLOCATION_METHOD_DEVICE,
681 VmafCudaConfiguration cuda_cfg = {
685 if (!format_is_supported(frames_ctx->sw_format)) {
686 av_log(s, AV_LOG_ERROR,
687 "Unsupported input format: %s\n", desc->name);
688 return AVERROR(EINVAL);
691 err = vmaf_init(&s->vmaf, cfg);
693 return AVERROR(EINVAL);
695 err = vmaf_cuda_state_init(&s->cu_state, cuda_cfg);
697 return AVERROR(EINVAL);
699 err = vmaf_cuda_import_state(s->vmaf, s->cu_state);
701 return AVERROR(EINVAL);
703 err = vmaf_cuda_preallocate_pictures(s->vmaf, cuda_pic_cfg);
707 err = parse_models(ctx);
711 err = parse_features(ctx);
715 return config_output(outlink);
718 static int copy_picture_data_cuda(VmafContext* vmaf,
719 AVCUDADeviceContext* device_hwctx,
720 AVFrame* src, VmafPicture* dst,
721 enum AVPixelFormat pix_fmt)
723 const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(pix_fmt);
724 CudaFunctions *cu = device_hwctx->internal->cuda_dl;
727 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
728 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
731 int err = vmaf_cuda_fetch_preallocated_picture(vmaf, dst);
733 return AVERROR(ENOMEM);
735 err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
737 return AVERROR_EXTERNAL;
739 for (unsigned i = 0; i < pix_desc->nb_components; i++) {
740 m.srcDevice = (CUdeviceptr) src->data[i];
741 m.srcPitch = src->linesize[i];
742 m.dstDevice = (CUdeviceptr) dst->data[i];
743 m.dstPitch = dst->stride[i];
744 m.WidthInBytes = dst->w[i] * ((dst->bpc + 7) / 8);
745 m.Height = dst->h[i];
747 err = cu->cuMemcpy2D(&m);
749 return AVERROR_EXTERNAL;
753 err = cu->cuCtxPopCurrent(NULL);
755 return AVERROR_EXTERNAL;
760 static int do_vmaf_cuda(FFFrameSync* fs)
762 AVFilterContext* ctx = fs->parent;
763 LIBVMAFContext* s = ctx->priv;
764 AVFilterLink *inlink = ctx->inputs[0];
765 AVHWFramesContext *frames_ctx = (AVHWFramesContext*) inlink->hw_frames_ctx->data;
766 AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
767 VmafPicture pic_ref, pic_dist;
772 err = ff_framesync_dualinput_get(fs, &dist, &ref);
775 if (ctx->is_disabled || !ref)
776 return ff_filter_frame(ctx->outputs[0], dist);
778 err = copy_picture_data_cuda(s->vmaf, device_hwctx, ref, &pic_ref,
779 frames_ctx->sw_format);
781 av_log(s, AV_LOG_ERROR, "problem during copy_picture_data_cuda.\n");
782 return AVERROR(ENOMEM);
785 err = copy_picture_data_cuda(s->vmaf, device_hwctx, dist, &pic_dist,
786 frames_ctx->sw_format);
788 av_log(s, AV_LOG_ERROR, "problem during copy_picture_data_cuda.\n");
789 return AVERROR(ENOMEM);
792 err = vmaf_read_pictures(s->vmaf, &pic_ref, &pic_dist, s->frame_cnt++);
794 av_log(s, AV_LOG_ERROR, "problem during vmaf_read_pictures.\n");
795 return AVERROR(EINVAL);
798 return ff_filter_frame(ctx->outputs[0], dist);
801 static av_cold int init_cuda(AVFilterContext *ctx)
803 LIBVMAFContext *s = ctx->priv;
804 s->fs.on_event = do_vmaf_cuda;
808 static const AVFilterPad libvmaf_outputs_cuda[] = {
811 .type = AVMEDIA_TYPE_VIDEO,
812 .config_props = config_props_cuda,
816 const AVFilter ff_vf_libvmaf_cuda = {
817 .name = "libvmaf_cuda",
818 .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF between two video streams."),
819 .preinit = libvmaf_framesync_preinit,
822 .activate = activate,
823 .priv_size = sizeof(LIBVMAFContext),
824 .priv_class = &libvmaf_class,
825 FILTER_INPUTS(libvmaf_inputs),
826 FILTER_OUTPUTS(libvmaf_outputs_cuda),
827 FILTER_SINGLE_PIXFMT(AV_PIX_FMT_CUDA),
828 .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,