2 * Copyright (c) 2022 Paul B Mahol
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "libavutil/tx.h"
25 #include "libavutil/avassert.h"
26 #include "libavutil/avstring.h"
27 #include "libavutil/channel_layout.h"
28 #include "libavutil/float_dsp.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/opt.h"
31 #include "libavutil/parseutils.h"
75 typedef struct ShowCWTContext {
80 AVRational auto_frame_rate;
81 AVRational frame_rate;
82 AVTXContext **fft, **ifft;
83 av_tx_fn tx_fn, itx_fn;
84 int fft_size, ifft_size;
89 float *frequency_band;
90 AVComplexFloat **kernel;
92 int *kernel_start, *kernel_stop;
106 int nb_consumed_samples;
112 int hop_size, ihop_size;
113 int hop_index, ihop_index;
114 int input_padding_size, output_padding_size;
115 int input_sample_count, output_sample_count;
116 int frequency_band_count;
117 float logarithmic_basis;
120 float minimum_frequency, maximum_frequency;
121 float minimum_intensity, maximum_intensity;
127 AVFloatDSPContext *fdsp;
130 #define OFFSET(x) offsetof(ShowCWTContext, x)
131 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
133 static const AVOption showcwt_options[] = {
134 { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "640x512"}, 0, 0, FLAGS },
135 { "s", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "640x512"}, 0, 0, FLAGS },
136 { "rate", "set video rate", OFFSET(rate_str), AV_OPT_TYPE_STRING, {.str = "25"}, 0, 0, FLAGS },
137 { "r", "set video rate", OFFSET(rate_str), AV_OPT_TYPE_STRING, {.str = "25"}, 0, 0, FLAGS },
138 { "scale", "set frequency scale", OFFSET(frequency_scale), AV_OPT_TYPE_INT, {.i64=0}, 0, NB_FSCALE-1, FLAGS, "scale" },
139 { "linear", "linear", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_LINEAR}, 0, 0, FLAGS, "scale" },
140 { "log", "logarithmic", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_LOG}, 0, 0, FLAGS, "scale" },
141 { "bark", "bark", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_BARK}, 0, 0, FLAGS, "scale" },
142 { "mel", "mel", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_MEL}, 0, 0, FLAGS, "scale" },
143 { "erbs", "erbs", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_ERBS}, 0, 0, FLAGS, "scale" },
144 { "sqrt", "sqrt", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_SQRT}, 0, 0, FLAGS, "scale" },
145 { "cbrt", "cbrt", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_CBRT}, 0, 0, FLAGS, "scale" },
146 { "qdrt", "qdrt", 0, AV_OPT_TYPE_CONST,{.i64=FSCALE_QDRT}, 0, 0, FLAGS, "scale" },
147 { "iscale", "set intensity scale", OFFSET(intensity_scale),AV_OPT_TYPE_INT, {.i64=0}, 0, NB_ISCALE-1, FLAGS, "iscale" },
148 { "linear", "linear", 0, AV_OPT_TYPE_CONST,{.i64=ISCALE_LINEAR}, 0, 0, FLAGS, "iscale" },
149 { "log", "logarithmic", 0, AV_OPT_TYPE_CONST,{.i64=ISCALE_LOG}, 0, 0, FLAGS, "iscale" },
150 { "sqrt", "sqrt", 0, AV_OPT_TYPE_CONST,{.i64=ISCALE_SQRT}, 0, 0, FLAGS, "iscale" },
151 { "cbrt", "cbrt", 0, AV_OPT_TYPE_CONST,{.i64=ISCALE_CBRT}, 0, 0, FLAGS, "iscale" },
152 { "qdrt", "qdrt", 0, AV_OPT_TYPE_CONST,{.i64=ISCALE_QDRT}, 0, 0, FLAGS, "iscale" },
153 { "min", "set minimum frequency", OFFSET(minimum_frequency), AV_OPT_TYPE_FLOAT, {.dbl = 20.}, 1, 192000, FLAGS },
154 { "max", "set maximum frequency", OFFSET(maximum_frequency), AV_OPT_TYPE_FLOAT, {.dbl = 20000.}, 1, 192000, FLAGS },
155 { "imin", "set minimum intensity", OFFSET(minimum_intensity), AV_OPT_TYPE_FLOAT, {.dbl = 0.}, 0, 1, FLAGS },
156 { "imax", "set maximum intensity", OFFSET(maximum_intensity), AV_OPT_TYPE_FLOAT, {.dbl = 1.}, 0, 1, FLAGS },
157 { "logb", "set logarithmic basis", OFFSET(logarithmic_basis), AV_OPT_TYPE_FLOAT, {.dbl = 0.0001}, 0, 1, FLAGS },
158 { "deviation", "set frequency deviation", OFFSET(deviation), AV_OPT_TYPE_FLOAT, {.dbl = 1.}, 0, 100, FLAGS },
159 { "pps", "set pixels per second", OFFSET(pps), AV_OPT_TYPE_INT, {.i64 = 64}, 1, 1024, FLAGS },
160 { "mode", "set output mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=0}, 0, 4, FLAGS, "mode" },
161 { "magnitude", "magnitude", 0, AV_OPT_TYPE_CONST,{.i64=0}, 0, 0, FLAGS, "mode" },
162 { "phase", "phase", 0, AV_OPT_TYPE_CONST,{.i64=1}, 0, 0, FLAGS, "mode" },
163 { "magphase", "magnitude+phase", 0, AV_OPT_TYPE_CONST,{.i64=2}, 0, 0, FLAGS, "mode" },
164 { "channel", "color per channel", 0, AV_OPT_TYPE_CONST,{.i64=3}, 0, 0, FLAGS, "mode" },
165 { "stereo", "stereo difference", 0, AV_OPT_TYPE_CONST,{.i64=4}, 0, 0, FLAGS, "mode" },
166 { "slide", "set slide mode", OFFSET(slide), AV_OPT_TYPE_INT, {.i64=0}, 0, NB_SLIDE-1, FLAGS, "slide" },
167 { "replace", "replace", 0, AV_OPT_TYPE_CONST,{.i64=SLIDE_REPLACE},0, 0, FLAGS, "slide" },
168 { "scroll", "scroll", 0, AV_OPT_TYPE_CONST,{.i64=SLIDE_SCROLL}, 0, 0, FLAGS, "slide" },
169 { "frame", "frame", 0, AV_OPT_TYPE_CONST,{.i64=SLIDE_FRAME}, 0, 0, FLAGS, "slide" },
170 { "direction", "set direction mode", OFFSET(direction), AV_OPT_TYPE_INT, {.i64=0}, 0, NB_DIRECTION-1, FLAGS, "direction" },
171 { "lr", "left to right", 0, AV_OPT_TYPE_CONST,{.i64=DIRECTION_LR}, 0, 0, FLAGS, "direction" },
172 { "rl", "right to left", 0, AV_OPT_TYPE_CONST,{.i64=DIRECTION_RL}, 0, 0, FLAGS, "direction" },
173 { "ud", "up to down", 0, AV_OPT_TYPE_CONST,{.i64=DIRECTION_UD}, 0, 0, FLAGS, "direction" },
174 { "du", "down to up", 0, AV_OPT_TYPE_CONST,{.i64=DIRECTION_DU}, 0, 0, FLAGS, "direction" },
175 { "bar", "set bar ratio", OFFSET(bar_ratio), AV_OPT_TYPE_FLOAT, {.dbl = 0.}, 0, 1, FLAGS },
176 { "rotation", "set color rotation", OFFSET(rotation), AV_OPT_TYPE_FLOAT, {.dbl = 0}, -1, 1, FLAGS },
180 AVFILTER_DEFINE_CLASS(showcwt);
182 static av_cold void uninit(AVFilterContext *ctx)
184 ShowCWTContext *s = ctx->priv;
186 av_freep(&s->frequency_band);
187 av_freep(&s->kernel_start);
188 av_freep(&s->kernel_stop);
191 av_frame_free(&s->cache);
192 av_frame_free(&s->outpicref);
193 av_frame_free(&s->fft_in);
194 av_frame_free(&s->fft_out);
195 av_frame_free(&s->dst_x);
196 av_frame_free(&s->src_x);
197 av_frame_free(&s->ifft_in);
198 av_frame_free(&s->ifft_out);
199 av_frame_free(&s->ch_out);
200 av_frame_free(&s->over);
201 av_frame_free(&s->bh_out);
204 for (int n = 0; n < s->nb_threads; n++)
205 av_tx_uninit(&s->fft[n]);
210 for (int n = 0; n < s->nb_threads; n++)
211 av_tx_uninit(&s->ifft[n]);
216 for (int n = 0; n < s->frequency_band_count; n++)
217 av_freep(&s->kernel[n]);
219 av_freep(&s->kernel);
224 static int query_formats(AVFilterContext *ctx)
226 AVFilterFormats *formats = NULL;
227 AVFilterChannelLayouts *layouts = NULL;
228 AVFilterLink *inlink = ctx->inputs[0];
229 AVFilterLink *outlink = ctx->outputs[0];
230 static const enum AVSampleFormat sample_fmts[] = { AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE };
231 static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVA444P, AV_PIX_FMT_NONE };
234 formats = ff_make_format_list(sample_fmts);
235 if ((ret = ff_formats_ref(formats, &inlink->outcfg.formats)) < 0)
238 layouts = ff_all_channel_counts();
239 if ((ret = ff_channel_layouts_ref(layouts, &inlink->outcfg.channel_layouts)) < 0)
242 formats = ff_all_samplerates();
243 if ((ret = ff_formats_ref(formats, &inlink->outcfg.samplerates)) < 0)
246 formats = ff_make_format_list(pix_fmts);
247 if ((ret = ff_formats_ref(formats, &outlink->incfg.formats)) < 0)
253 static float frequency_band(float *frequency_band,
254 int frequency_band_count,
255 float frequency_range,
256 float frequency_offset,
257 int frequency_scale, float deviation)
261 deviation = sqrtf(deviation / (4.f * M_PI)); // Heisenberg Gabor Limit
262 for (int y = 0; y < frequency_band_count; y++) {
263 float frequency = frequency_range * (1.f - (float)y / frequency_band_count) + frequency_offset;
264 float frequency_derivative = frequency_range / frequency_band_count;
266 switch (frequency_scale) {
268 frequency = powf(2.f, frequency);
269 frequency_derivative *= logf(2.f) * frequency;
272 frequency = 600.f * sinhf(frequency / 6.f);
273 frequency_derivative *= sqrtf(frequency * frequency + 360000.f) / 6.f;
276 frequency = 700.f * (powf(10.f, frequency / 2595.f) - 1.f);
277 frequency_derivative *= (frequency + 700.f) * logf(10.f) / 2595.f;
280 frequency = 676170.4f / (47.06538f - expf(frequency * 0.08950404f)) - 14678.49f;
281 frequency_derivative *= (frequency * frequency + 14990.4f * frequency + 4577850.f) / 160514.f;
284 frequency = frequency * frequency;
285 frequency_derivative *= 2.f * sqrtf(frequency);
288 frequency = frequency * frequency * frequency;
289 frequency_derivative *= 3.f * powf(frequency, 2.f / 3.f);
292 frequency = frequency * frequency * frequency * frequency;
293 frequency_derivative *= 4.f * powf(frequency, 3.f / 4.f);
297 frequency_band[y*2 ] = frequency;
298 frequency_band[y*2+1] = frequency_derivative * deviation;
300 ret = 1.f / (frequency_derivative * deviation);
306 static float remap_log(ShowCWTContext *s, float value, int iscale, float log_factor)
308 const float max = s->maximum_intensity;
309 const float min = s->minimum_intensity;
316 ret = max - expf(value / log_factor);
319 value = logf(value) * log_factor;
320 ret = max - av_clipf(value, 0.f, 1.f);
323 value = max - expf(value / log_factor);
327 value = max - expf(value / log_factor);
331 value = max - expf(value / log_factor);
332 ret = powf(value, 0.25f);
336 return av_clipf(ret, 0.f, 1.f);
339 static int run_channel_cwt_prepare(AVFilterContext *ctx, void *arg, int jobnr, int ch)
341 ShowCWTContext *s = ctx->priv;
342 const int hop_size = s->hop_size;
344 float *cache = (float *)s->cache->extended_data[ch];
345 AVComplexFloat *src = (AVComplexFloat *)s->fft_in->extended_data[ch];
346 AVComplexFloat *dst = (AVComplexFloat *)s->fft_out->extended_data[ch];
347 const int offset = (s->input_padding_size - hop_size) >> 1;
350 const float *input = (const float *)fin->extended_data[ch];
351 const int offset = s->hop_size - fin->nb_samples;
353 memmove(cache, &cache[fin->nb_samples], offset * sizeof(float));
354 memcpy(&cache[offset], input, fin->nb_samples * sizeof(float));
357 if (fin && s->hop_index + fin->nb_samples < hop_size)
360 memset(src, 0, sizeof(float) * s->fft_size);
361 for (int n = 0; n < hop_size; n++)
362 src[n+offset].re = cache[n];
364 s->tx_fn(s->fft[jobnr], dst, src, sizeof(*src));
369 #define DRAW_BAR_COLOR(x) \
376 float mul = (Y - ht) * bh[0]; \
377 dstY[x] = av_clip_uint8(lrintf(Y * mul * 255.f)); \
378 dstU[x] = av_clip_uint8(lrintf((U-0.5f) * 128.f + 128)); \
379 dstV[x] = av_clip_uint8(lrintf((V-0.5f) * 128.f + 128)); \
383 static void draw_bar(ShowCWTContext *s, int y,
384 float Y, float U, float V)
386 float *bh = ((float *)s->bh_out->extended_data[0]) + y;
387 const ptrdiff_t ylinesize = s->outpicref->linesize[0];
388 const ptrdiff_t ulinesize = s->outpicref->linesize[1];
389 const ptrdiff_t vlinesize = s->outpicref->linesize[2];
390 const int direction = s->direction;
391 const int bar_size = s->bar_size;
392 const float rcp_bar_h = 1.f / bar_size;
393 uint8_t *dstY, *dstU, *dstV;
394 const int w_1 = s->w - 1;
396 bh[0] = 1.f / (Y + 0.0001f);
399 dstY = s->outpicref->data[0] + y * ylinesize;
400 dstU = s->outpicref->data[1] + y * ulinesize;
401 dstV = s->outpicref->data[2] + y * vlinesize;
402 for (int x = 0; x < bar_size; x++) {
403 float ht = (bar_size - x) * rcp_bar_h;
408 dstY = s->outpicref->data[0] + y * ylinesize;
409 dstU = s->outpicref->data[1] + y * ulinesize;
410 dstV = s->outpicref->data[2] + y * vlinesize;
411 for (int x = 0; x < bar_size; x++) {
412 float ht = x * rcp_bar_h;
413 DRAW_BAR_COLOR(w_1 - bar_size + x);
417 dstY = s->outpicref->data[0] + w_1 - y;
418 dstU = s->outpicref->data[1] + w_1 - y;
419 dstV = s->outpicref->data[2] + w_1 - y;
420 for (int x = 0; x < bar_size; x++) {
421 float ht = (bar_size - x) * rcp_bar_h;
429 dstY = s->outpicref->data[0] + w_1 - y + ylinesize * (s->h - 1 - bar_size);
430 dstU = s->outpicref->data[1] + w_1 - y + ulinesize * (s->h - 1 - bar_size);
431 dstV = s->outpicref->data[2] + w_1 - y + vlinesize * (s->h - 1 - bar_size);
432 for (int x = 0; x < bar_size; x++) {
433 float ht = x * rcp_bar_h;
443 static int draw(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
445 ShowCWTContext *s = ctx->priv;
446 const ptrdiff_t ylinesize = s->outpicref->linesize[0];
447 const ptrdiff_t ulinesize = s->outpicref->linesize[1];
448 const ptrdiff_t vlinesize = s->outpicref->linesize[2];
449 const ptrdiff_t alinesize = s->outpicref->linesize[3];
450 const float log_factor = 1.f/logf(s->logarithmic_basis);
451 const int count = s->frequency_band_count;
452 const int start = (count * jobnr) / nb_jobs;
453 const int end = (count * (jobnr+1)) / nb_jobs;
454 const int nb_channels = s->nb_channels;
455 const int iscale = s->intensity_scale;
456 const int ihop_index = s->ihop_index;
457 const int ihop_size = s->ihop_size;
458 const float rotation = s->rotation;
459 const int direction = s->direction;
460 uint8_t *dstY, *dstU, *dstV, *dstA;
461 const int bar_size = s->bar_size;
462 const int mode = s->mode;
463 const int w_1 = s->w - 1;
464 const int x = s->pos;
467 for (int y = start; y < end; y++) {
468 const AVComplexFloat *src = ((const AVComplexFloat *)s->ch_out->extended_data[y]) +
469 0 * ihop_size + ihop_index;
474 dstY = s->outpicref->data[0] + y * ylinesize;
475 dstU = s->outpicref->data[1] + y * ulinesize;
476 dstV = s->outpicref->data[2] + y * vlinesize;
477 dstA = s->outpicref->data[3] ? s->outpicref->data[3] + y * alinesize : NULL;
481 dstY = s->outpicref->data[0] + x * ylinesize + w_1 - y;
482 dstU = s->outpicref->data[1] + x * ulinesize + w_1 - y;
483 dstV = s->outpicref->data[2] + x * vlinesize + w_1 - y;
484 dstA = s->outpicref->data[3] ? s->outpicref->data[3] + x * alinesize + w_1 - y : NULL;
491 /* nothing to do here */
494 switch (s->direction) {
496 memmove(dstY, dstY + 1, w_1);
497 memmove(dstU, dstU + 1, w_1);
498 memmove(dstV, dstV + 1, w_1);
500 memmove(dstA, dstA + 1, w_1);
503 memmove(dstY + 1, dstY, w_1);
504 memmove(dstU + 1, dstU, w_1);
505 memmove(dstV + 1, dstV, w_1);
507 memmove(dstA + 1, dstA, w_1);
513 if (direction == DIRECTION_RL ||
514 direction == DIRECTION_LR) {
525 const AVComplexFloat *src2 = (nb_channels > 1) ? src + ihop_size: src;
528 z = hypotf(src[0].re + src2[0].re, src[0].im + src2[0].im);
529 u = hypotf(src[0].re, src[0].im);
530 v = hypotf(src2[0].re, src2[0].im);
532 z = remap_log(s, z, iscale, log_factor);
533 u = remap_log(s, u, iscale, log_factor);
534 v = remap_log(s, v, iscale, log_factor);
537 U = sinf((v - u) * M_PI_2);
538 V = sinf((u - v) * M_PI_2);
540 u = U * cosf(rotation * M_PI) - V * sinf(rotation * M_PI);
541 v = U * sinf(rotation * M_PI) + V * cosf(rotation * M_PI);
543 U = 0.5f + 0.5f * z * u;
544 V = 0.5f + 0.5f * z * v;
546 dstY[0] = av_clip_uint8(lrintf(Y * 255.f));
547 dstU[0] = av_clip_uint8(lrintf(U * 255.f));
548 dstV[0] = av_clip_uint8(lrintf(V * 255.f));
553 draw_bar(s, y, Y, U, V);
558 const int nb_channels = s->nb_channels;
559 const float yf = 1.f / nb_channels;
563 for (int ch = 0; ch < nb_channels; ch++) {
564 const AVComplexFloat *srcn = src + ihop_size * ch;
567 z = hypotf(srcn[0].re, srcn[0].im);
568 z = remap_log(s, z, iscale, log_factor);
571 U += z * yf * sinf(2.f * M_PI * (ch * yf + rotation));
572 V += z * yf * cosf(2.f * M_PI * (ch * yf + rotation));
575 dstY[0] = av_clip_uint8(lrintf(Y * 255.f));
576 dstU[0] = av_clip_uint8(lrintf(U * 255.f));
577 dstV[0] = av_clip_uint8(lrintf(V * 255.f));
582 draw_bar(s, y, Y, U, V);
586 Y = hypotf(src[0].re, src[0].im);
587 Y = remap_log(s, Y, iscale, log_factor);
588 U = atan2f(src[0].im, src[0].re);
589 U = 0.5f + 0.5f * U * Y / M_PI;
592 dstY[0] = av_clip_uint8(lrintf(Y * 255.f));
593 dstU[0] = av_clip_uint8(lrintf(U * 255.f));
594 dstV[0] = av_clip_uint8(lrintf(V * 255.f));
598 draw_bar(s, y, Y, U, V);
601 Y = atan2f(src[0].im, src[0].re);
602 Y = 0.5f + 0.5f * Y / M_PI;
604 dstY[0] = av_clip_uint8(lrintf(Y * 255.f));
608 draw_bar(s, y, Y, 0.5f, 0.5f);
611 Y = hypotf(src[0].re, src[0].im);
612 Y = remap_log(s, Y, iscale, log_factor);
614 dstY[0] = av_clip_uint8(lrintf(Y * 255.f));
619 draw_bar(s, y, Y, 0.5f, 0.5f);
627 static int run_channel_cwt(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
629 ShowCWTContext *s = ctx->priv;
630 const int ch = *(int *)arg;
631 const AVComplexFloat *fft_out = (const AVComplexFloat *)s->fft_out->extended_data[ch];
632 AVComplexFloat *isrc = (AVComplexFloat *)s->ifft_in->extended_data[jobnr];
633 AVComplexFloat *idst = (AVComplexFloat *)s->ifft_out->extended_data[jobnr];
634 const int output_padding_size = s->output_padding_size;
635 const int input_padding_size = s->input_padding_size;
636 const float scale = 1.f / input_padding_size;
637 const int ihop_size = s->ihop_size;
638 const int count = s->frequency_band_count;
639 const int start = (count * jobnr) / nb_jobs;
640 const int end = (count * (jobnr+1)) / nb_jobs;
642 for (int y = start; y < end; y++) {
643 AVComplexFloat *chout = ((AVComplexFloat *)s->ch_out->extended_data[y]) + ch * ihop_size;
644 AVComplexFloat *over = ((AVComplexFloat *)s->over->extended_data[ch]) + y * ihop_size;
645 AVComplexFloat *dstx = (AVComplexFloat *)s->dst_x->extended_data[jobnr];
646 AVComplexFloat *srcx = (AVComplexFloat *)s->src_x->extended_data[jobnr];
647 const AVComplexFloat *kernel = s->kernel[y];
648 const unsigned *index = (const unsigned *)s->index;
649 const int kernel_start = s->kernel_start[y];
650 const int kernel_stop = s->kernel_stop[y];
651 const int kernel_range = kernel_stop - kernel_start + 1;
654 if (kernel_start >= 0) {
656 memcpy(srcx, fft_out + kernel_start, sizeof(*fft_out) * kernel_range);
658 offset = -kernel_start;
659 memcpy(srcx+offset, fft_out, sizeof(*fft_out) * (kernel_range-offset));
660 memcpy(srcx, fft_out+input_padding_size-offset, sizeof(*fft_out)*offset);
663 s->fdsp->vector_fmul_scalar((float *)srcx, (const float *)srcx, scale, FFALIGN(kernel_range * 2, 4));
664 s->fdsp->vector_fmul((float *)dstx, (const float *)srcx,
665 (const float *)kernel, FFALIGN(kernel_range * 2, 16));
667 memset(isrc, 0, sizeof(*isrc) * output_padding_size);
669 for (int i = 0; i < kernel_range; i++) {
670 const unsigned n = index[i + kernel_start];
672 isrc[n].re += dstx[i].re;
673 isrc[n].im += dstx[i].im;
676 for (int i = 0; i < kernel_range; i++) {
677 const unsigned n = (i-kernel_start) & (output_padding_size-1);
679 isrc[n].re += dstx[i].re;
680 isrc[n].im += dstx[i].im;
684 s->itx_fn(s->ifft[jobnr], idst, isrc, sizeof(*isrc));
686 memcpy(chout, idst, sizeof(*chout) * ihop_size);
687 for (int n = 0; n < ihop_size; n++) {
688 chout[n].re += over[n].re;
689 chout[n].im += over[n].im;
691 memcpy(over, idst + ihop_size, sizeof(*over) * ihop_size);
697 static int compute_kernel(AVFilterContext *ctx)
699 ShowCWTContext *s = ctx->priv;
700 const int size = s->input_padding_size;
701 const int output_sample_count = s->output_sample_count;
702 const int fsize = s->frequency_band_count;
703 int *kernel_start = s->kernel_start;
704 int *kernel_stop = s->kernel_stop;
705 unsigned *index = s->index;
706 int range_min = INT_MAX;
707 int range_max = 0, ret = 0;
710 tkernel = av_malloc_array(size, sizeof(*tkernel));
712 return AVERROR(ENOMEM);
714 for (int y = 0; y < fsize; y++) {
715 AVComplexFloat *kernel = s->kernel[y];
716 int start = INT_MIN, stop = INT_MAX;
717 const float frequency = s->frequency_band[y*2];
718 const float deviation = 1.f / (s->frequency_band[y*2+1] *
719 output_sample_count);
720 const int a = FFMAX(frequency-12.f*sqrtf(1.f/deviation)-0.5f, -size);
721 const int b = FFMIN(frequency+12.f*sqrtf(1.f/deviation)-0.5f, size+a);
722 const int range = -a;
724 memset(tkernel, 0, size * sizeof(*tkernel));
725 for (int n = a; n < b; n++) {
726 float ff, f = n+0.5f-frequency;
728 ff = expf(-f*f*deviation);
729 tkernel[n+range] = ff;
732 for (int n = a; n < b; n++) {
733 if (tkernel[n+range] != 0.f) {
734 if (tkernel[n+range] > FLT_MIN)
735 av_log(ctx, AV_LOG_DEBUG, "out of range kernel %g\n", tkernel[n+range]);
741 for (int n = b; n >= a; n--) {
742 if (tkernel[n+range] != 0.f) {
743 if (tkernel[n+range] > FLT_MIN)
744 av_log(ctx, AV_LOG_DEBUG, "out of range kernel %g\n", tkernel[n+range]);
750 if (start == INT_MIN || stop == INT_MAX) {
751 ret = AVERROR(EINVAL);
755 kernel_start[y] = start;
756 kernel_stop[y] = stop;
758 kernel = av_calloc(FFALIGN(stop-start+1, 16), sizeof(*kernel));
760 ret = AVERROR(ENOMEM);
764 for (int n = 0; n <= stop - start; n++) {
765 kernel[n].re = tkernel[n+range+start];
766 kernel[n].im = tkernel[n+range+start];
769 range_min = FFMIN(range_min, stop+1-start);
770 range_max = FFMAX(range_max, stop+1-start);
772 s->kernel[y] = kernel;
775 for (int n = 0; n < size; n++)
776 index[n] = n & (s->output_padding_size - 1);
778 av_log(ctx, AV_LOG_DEBUG, "range_min: %d\n", range_min);
779 av_log(ctx, AV_LOG_DEBUG, "range_max: %d\n", range_max);
786 static int config_output(AVFilterLink *outlink)
788 AVFilterContext *ctx = outlink->src;
789 AVFilterLink *inlink = ctx->inputs[0];
790 ShowCWTContext *s = ctx->priv;
791 float maximum_frequency = fminf(s->maximum_frequency, inlink->sample_rate * 0.5f);
792 float minimum_frequency = s->minimum_frequency;
793 float scale = 1.f, factor;
796 if (minimum_frequency >= maximum_frequency) {
797 av_log(ctx, AV_LOG_ERROR, "min frequency (%f) >= (%f) max frequency\n",
798 minimum_frequency, maximum_frequency);
799 return AVERROR(EINVAL);
804 s->fdsp = avpriv_float_dsp_alloc(0);
806 return AVERROR(ENOMEM);
808 switch (s->direction) {
811 s->bar_size = s->w * s->bar_ratio;
812 s->frequency_band_count = s->h;
816 s->bar_size = s->h * s->bar_ratio;
817 s->frequency_band_count = s->w;
821 switch (s->frequency_scale) {
823 minimum_frequency = logf(minimum_frequency) / logf(2.f);
824 maximum_frequency = logf(maximum_frequency) / logf(2.f);
827 minimum_frequency = 6.f * asinhf(minimum_frequency / 600.f);
828 maximum_frequency = 6.f * asinhf(maximum_frequency / 600.f);
831 minimum_frequency = 2595.f * log10f(1.f + minimum_frequency / 700.f);
832 maximum_frequency = 2595.f * log10f(1.f + maximum_frequency / 700.f);
835 minimum_frequency = 11.17268f * logf(1.f + (46.06538f * minimum_frequency) / (minimum_frequency + 14678.49f));
836 maximum_frequency = 11.17268f * logf(1.f + (46.06538f * maximum_frequency) / (maximum_frequency + 14678.49f));
839 minimum_frequency = sqrtf(minimum_frequency);
840 maximum_frequency = sqrtf(maximum_frequency);
843 minimum_frequency = cbrtf(minimum_frequency);
844 maximum_frequency = cbrtf(maximum_frequency);
847 minimum_frequency = powf(minimum_frequency, 0.25f);
848 maximum_frequency = powf(maximum_frequency, 0.25f);
852 s->frequency_band = av_calloc(s->frequency_band_count,
853 sizeof(*s->frequency_band) * 2);
854 if (!s->frequency_band)
855 return AVERROR(ENOMEM);
857 s->nb_consumed_samples = inlink->sample_rate *
858 frequency_band(s->frequency_band,
859 s->frequency_band_count, maximum_frequency - minimum_frequency,
860 minimum_frequency, s->frequency_scale, s->deviation);
861 s->nb_consumed_samples = FFMIN(s->nb_consumed_samples, 65536);
863 s->nb_threads = FFMIN(s->frequency_band_count, ff_filter_get_nb_threads(ctx));
864 s->nb_channels = inlink->ch_layout.nb_channels;
865 s->old_pts = AV_NOPTS_VALUE;
866 s->eof_pts = AV_NOPTS_VALUE;
868 s->input_sample_count = 1 << (32 - ff_clz(s->nb_consumed_samples));
869 s->input_padding_size = 1 << (32 - ff_clz(s->input_sample_count));
870 s->output_sample_count = FFMAX(1, av_rescale(s->input_sample_count, s->pps, inlink->sample_rate));
871 s->output_padding_size = 1 << (32 - ff_clz(s->output_sample_count));
873 s->hop_size = s->input_sample_count;
874 s->ihop_size = s->output_padding_size >> 1;
878 outlink->sample_aspect_ratio = (AVRational){1,1};
880 s->fft_size = FFALIGN(s->input_padding_size, av_cpu_max_align());
881 s->ifft_size = FFALIGN(s->output_padding_size, av_cpu_max_align());
883 s->fft = av_calloc(s->nb_threads, sizeof(*s->fft));
885 return AVERROR(ENOMEM);
887 for (int n = 0; n < s->nb_threads; n++) {
888 ret = av_tx_init(&s->fft[n], &s->tx_fn, AV_TX_FLOAT_FFT, 0, s->input_padding_size, &scale, 0);
893 s->ifft = av_calloc(s->nb_threads, sizeof(*s->ifft));
895 return AVERROR(ENOMEM);
897 for (int n = 0; n < s->nb_threads; n++) {
898 ret = av_tx_init(&s->ifft[n], &s->itx_fn, AV_TX_FLOAT_FFT, 1, s->output_padding_size, &scale, 0);
903 s->outpicref = ff_get_video_buffer(outlink, outlink->w, outlink->h);
904 s->fft_in = ff_get_audio_buffer(inlink, s->fft_size * 2);
905 s->fft_out = ff_get_audio_buffer(inlink, s->fft_size * 2);
906 s->dst_x = av_frame_alloc();
907 s->src_x = av_frame_alloc();
908 s->kernel = av_calloc(s->frequency_band_count, sizeof(*s->kernel));
909 s->cache = ff_get_audio_buffer(inlink, s->hop_size);
910 s->over = ff_get_audio_buffer(inlink, s->frequency_band_count * 2 * s->ihop_size);
911 s->bh_out = ff_get_audio_buffer(inlink, s->frequency_band_count);
912 s->ifft_in = av_frame_alloc();
913 s->ifft_out = av_frame_alloc();
914 s->ch_out = av_frame_alloc();
915 s->index = av_calloc(s->input_padding_size, sizeof(*s->index));
916 s->kernel_start = av_calloc(s->frequency_band_count, sizeof(*s->kernel_start));
917 s->kernel_stop = av_calloc(s->frequency_band_count, sizeof(*s->kernel_stop));
918 if (!s->outpicref || !s->fft_in || !s->fft_out || !s->src_x || !s->dst_x || !s->over ||
919 !s->ifft_in || !s->ifft_out || !s->kernel_start || !s->kernel_stop || !s->ch_out ||
920 !s->cache || !s->index || !s->bh_out || !s->kernel)
921 return AVERROR(ENOMEM);
923 s->ch_out->format = inlink->format;
924 s->ch_out->nb_samples = 2 * s->ihop_size * inlink->ch_layout.nb_channels;
925 s->ch_out->ch_layout.nb_channels = s->frequency_band_count;
926 ret = av_frame_get_buffer(s->ch_out, 0);
930 s->ifft_in->format = inlink->format;
931 s->ifft_in->nb_samples = s->ifft_size * 2;
932 s->ifft_in->ch_layout.nb_channels = s->nb_threads;
933 ret = av_frame_get_buffer(s->ifft_in, 0);
937 s->ifft_out->format = inlink->format;
938 s->ifft_out->nb_samples = s->ifft_size * 2;
939 s->ifft_out->ch_layout.nb_channels = s->nb_threads;
940 ret = av_frame_get_buffer(s->ifft_out, 0);
944 s->src_x->format = inlink->format;
945 s->src_x->nb_samples = s->fft_size * 2;
946 s->src_x->ch_layout.nb_channels = s->nb_threads;
947 ret = av_frame_get_buffer(s->src_x, 0);
951 s->dst_x->format = inlink->format;
952 s->dst_x->nb_samples = s->fft_size * 2;
953 s->dst_x->ch_layout.nb_channels = s->nb_threads;
954 ret = av_frame_get_buffer(s->dst_x, 0);
958 s->outpicref->sample_aspect_ratio = (AVRational){1,1};
960 for (int y = 0; y < outlink->h; y++) {
961 memset(s->outpicref->data[0] + y * s->outpicref->linesize[0], 0, outlink->w);
962 memset(s->outpicref->data[1] + y * s->outpicref->linesize[1], 128, outlink->w);
963 memset(s->outpicref->data[2] + y * s->outpicref->linesize[2], 128, outlink->w);
964 if (s->outpicref->data[3])
965 memset(s->outpicref->data[3] + y * s->outpicref->linesize[3], 0, outlink->w);
968 s->outpicref->color_range = AVCOL_RANGE_JPEG;
970 factor = s->input_padding_size / (float)inlink->sample_rate;
971 for (int n = 0; n < s->frequency_band_count; n++) {
972 s->frequency_band[2*n ] *= factor;
973 s->frequency_band[2*n+1] *= factor;
976 av_log(ctx, AV_LOG_DEBUG, "factor: %f\n", factor);
977 av_log(ctx, AV_LOG_DEBUG, "nb_consumed_samples: %d\n", s->nb_consumed_samples);
978 av_log(ctx, AV_LOG_DEBUG, "hop_size: %d\n", s->hop_size);
979 av_log(ctx, AV_LOG_DEBUG, "ihop_size: %d\n", s->ihop_size);
980 av_log(ctx, AV_LOG_DEBUG, "input_sample_count: %d\n", s->input_sample_count);
981 av_log(ctx, AV_LOG_DEBUG, "input_padding_size: %d\n", s->input_padding_size);
982 av_log(ctx, AV_LOG_DEBUG, "output_sample_count: %d\n", s->output_sample_count);
983 av_log(ctx, AV_LOG_DEBUG, "output_padding_size: %d\n", s->output_padding_size);
985 switch (s->direction) {
987 s->pos = s->bar_size;
990 s->pos = FFMAX(0, s->w - 2 - s->bar_size);
993 s->pos = s->bar_size;
996 s->pos = FFMAX(0, s->h - 2 - s->bar_size);
1000 s->auto_frame_rate = av_make_q(inlink->sample_rate, s->hop_size);
1001 if (strcmp(s->rate_str, "auto")) {
1002 ret = av_parse_video_rate(&s->frame_rate, s->rate_str);
1004 s->frame_rate = s->auto_frame_rate;
1006 outlink->frame_rate = s->frame_rate;
1007 outlink->time_base = av_inv_q(outlink->frame_rate);
1009 ret = compute_kernel(ctx);
1016 static int output_frame(AVFilterContext *ctx)
1018 AVFilterLink *outlink = ctx->outputs[0];
1019 AVFilterLink *inlink = ctx->inputs[0];
1020 ShowCWTContext *s = ctx->priv;
1021 const int nb_planes = 3 + (s->outpicref->data[3] != NULL);
1026 switch (s->direction) {
1028 for (int p = 0; p < nb_planes; p++) {
1029 ptrdiff_t linesize = s->outpicref->linesize[p];
1031 for (int y = s->h - 1; y > s->bar_size; y--) {
1032 uint8_t *dst = s->outpicref->data[p] + y * linesize;
1034 memmove(dst, dst - linesize, s->w);
1039 for (int p = 0; p < nb_planes; p++) {
1040 ptrdiff_t linesize = s->outpicref->linesize[p];
1042 for (int y = 0; y < s->h - 2 - s->bar_size; y++) {
1043 uint8_t *dst = s->outpicref->data[p] + y * linesize;
1045 memmove(dst, dst + linesize, s->w);
1053 ff_filter_execute(ctx, draw, NULL, NULL, s->nb_threads);
1058 switch (s->direction) {
1061 if (s->pos >= s->w) {
1062 s->pos = s->bar_size;
1069 s->pos = FFMAX(0, s->w - 2 - s->bar_size);
1075 if (s->pos >= s->h) {
1076 s->pos = s->bar_size;
1083 s->pos = FFMAX(0, s->h - 2 - s->bar_size);
1090 switch (s->direction) {
1093 s->pos = s->bar_size;
1096 s->pos = FFMAX(0, s->w - 2 - s->bar_size);
1099 s->pos = FFMAX(0, s->h - 2 - s->bar_size);
1105 if (s->slide == SLIDE_FRAME && s->eof) {
1106 switch (s->direction) {
1108 for (int p = 0; p < nb_planes; p++) {
1109 ptrdiff_t linesize = s->outpicref->linesize[p];
1110 const int size = s->w - s->pos;
1111 const int fill = p > 0 && p < 3 ? 128 : 0;
1112 const int x = s->pos;
1114 for (int y = 0; y < s->h; y++) {
1115 uint8_t *dst = s->outpicref->data[p] + y * linesize + x;
1117 memset(dst, fill, size);
1122 for (int p = 0; p < nb_planes; p++) {
1123 ptrdiff_t linesize = s->outpicref->linesize[p];
1124 const int size = s->w - s->pos;
1125 const int fill = p > 0 && p < 3 ? 128 : 0;
1127 for (int y = 0; y < s->h; y++) {
1128 uint8_t *dst = s->outpicref->data[p] + y * linesize;
1130 memset(dst, fill, size);
1135 for (int p = 0; p < nb_planes; p++) {
1136 ptrdiff_t linesize = s->outpicref->linesize[p];
1137 const int fill = p > 0 && p < 3 ? 128 : 0;
1139 for (int y = s->pos; y < s->h; y++) {
1140 uint8_t *dst = s->outpicref->data[p] + y * linesize;
1142 memset(dst, fill, s->w);
1147 for (int p = 0; p < nb_planes; p++) {
1148 ptrdiff_t linesize = s->outpicref->linesize[p];
1149 const int fill = p > 0 && p < 3 ? 128 : 0;
1151 for (int y = s->h - s->pos; y >= 0; y--) {
1152 uint8_t *dst = s->outpicref->data[p] + y * linesize;
1154 memset(dst, fill, s->w);
1161 s->new_frame = s->slide == SLIDE_FRAME && (s->new_frame || s->eof);
1163 if (s->slide != SLIDE_FRAME || s->new_frame == 1) {
1164 int64_t pts_offset = s->new_frame ? 0LL : av_rescale(s->ihop_index, s->hop_size, s->ihop_size);
1165 const int offset = (s->input_padding_size - s->hop_size) >> 1;
1167 pts_offset = av_rescale_q(pts_offset - offset, av_make_q(1, inlink->sample_rate), inlink->time_base);
1168 s->outpicref->pts = av_rescale_q(s->in_pts + pts_offset, inlink->time_base, outlink->time_base);
1169 s->outpicref->duration = 1;
1173 if (s->ihop_index >= s->ihop_size)
1174 s->ihop_index = s->hop_index = 0;
1176 if (s->slide == SLIDE_FRAME && s->new_frame == 0)
1179 if (s->old_pts < s->outpicref->pts) {
1180 AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
1182 return AVERROR(ENOMEM);
1183 ret = av_frame_copy_props(out, s->outpicref);
1186 ret = av_frame_copy(out, s->outpicref);
1189 s->old_pts = s->outpicref->pts;
1191 ret = ff_filter_frame(outlink, out);
1195 av_frame_free(&out);
1202 static int run_channels_cwt_prepare(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
1204 ShowCWTContext *s = ctx->priv;
1205 const int count = s->nb_channels;
1206 const int start = (count * jobnr) / nb_jobs;
1207 const int end = (count * (jobnr+1)) / nb_jobs;
1209 for (int ch = start; ch < end; ch++)
1210 run_channel_cwt_prepare(ctx, arg, jobnr, ch);
1215 static int activate(AVFilterContext *ctx)
1217 AVFilterLink *inlink = ctx->inputs[0];
1218 AVFilterLink *outlink = ctx->outputs[0];
1219 ShowCWTContext *s = ctx->priv;
1220 int ret = 0, status;
1223 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
1226 AVFrame *fin = NULL;
1228 if (s->hop_index < s->hop_size) {
1230 ret = ff_inlink_consume_samples(inlink, 1, s->hop_size - s->hop_index, &fin);
1235 if (ret > 0 || s->eof) {
1236 ff_filter_execute(ctx, run_channels_cwt_prepare, fin, NULL,
1237 FFMIN(s->nb_threads, s->nb_channels));
1239 if (s->hop_index == 0)
1240 s->in_pts = fin->pts;
1241 s->hop_index += fin->nb_samples;
1242 av_frame_free(&fin);
1244 s->hop_index = s->hop_size;
1249 if (s->hop_index >= s->hop_size || s->ihop_index > 0) {
1250 for (int ch = 0; ch < s->nb_channels && s->ihop_index == 0; ch++) {
1251 ff_filter_execute(ctx, run_channel_cwt, (void *)&ch, NULL,
1255 ret = output_frame(ctx);
1262 if (s->slide == SLIDE_FRAME)
1263 ret = output_frame(ctx);
1264 ff_outlink_set_status(outlink, AVERROR_EOF, s->eof_pts);
1268 if (!s->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
1269 if (status == AVERROR_EOF) {
1271 ff_filter_set_ready(ctx, 10);
1272 s->eof_pts = av_rescale_q(pts, inlink->time_base, outlink->time_base);
1277 if (ff_inlink_queued_samples(inlink) > 0 || s->ihop_index ||
1278 s->hop_index >= s->hop_size || s->eof) {
1279 ff_filter_set_ready(ctx, 10);
1283 if (ff_outlink_frame_wanted(outlink)) {
1284 ff_inlink_request_frame(inlink);
1288 return FFERROR_NOT_READY;
1291 static const AVFilterPad showcwt_outputs[] = {
1294 .type = AVMEDIA_TYPE_VIDEO,
1295 .config_props = config_output,
1299 const AVFilter ff_avf_showcwt = {
1301 .description = NULL_IF_CONFIG_SMALL("Convert input audio to a CWT (Continuous Wavelet Transform) spectrum video output."),
1303 .priv_size = sizeof(ShowCWTContext),
1304 FILTER_INPUTS(ff_audio_default_filterpad),
1305 FILTER_OUTPUTS(showcwt_outputs),
1306 FILTER_QUERY_FUNC(query_formats),
1307 .activate = activate,
1308 .priv_class = &showcwt_class,
1309 .flags = AVFILTER_FLAG_SLICE_THREADS,