rnnoise: Support various samplerate 83/301983/10 accepted/tizen_unified_riscv accepted/tizen/unified/20231221.165305 accepted/tizen/unified/riscv/20231226.210919
authorJaechul Lee <jcsing.lee@samsung.com>
Wed, 29 Nov 2023 04:47:50 +0000 (13:47 +0900)
committerJaechul Lee <jcsing.lee@samsung.com>
Tue, 19 Dec 2023 02:33:52 +0000 (11:33 +0900)
[Version] 0.0.20
[Issue Type] Update

Change-Id: Ib2b2ef072bd77d8811f365cdc81ab1eba7e7e9d8
Signed-off-by: Jaechul Lee <jcsing.lee@samsung.com>
meson.build
packaging/libaudio-effect.spec
src/plugin_ns_rnnoise.c
test/meson.build
test/ns_rnnoise_test.c
test/resources/noise_441k_1ch.raw [new file with mode: 0644]
test/resources/noise_441k_2ch.raw [new file with mode: 0644]
test/resources/noise_48k.raw [deleted file]
test/resources/noise_48k_1ch.raw [new file with mode: 0644]
test/resources/noise_48k_2ch.raw [deleted file]

index 8dc5442cd724d90d12bb434941e55f452f1adaf9..73ea86bdb9b2e6b4f188e173ea428daabc73b23d 100644 (file)
@@ -40,7 +40,7 @@ audio_effect_shared = shared_library('audio-effect',
 if get_option('ns-rnnoise').enabled()
 shared_library('audio-effect-ns-rnnoise',
   sources: [ 'src/plugin_ns_rnnoise.c' ],
-  dependencies: [ dependency('rnnoise'), platform_dep ],
+  dependencies: [ dependency('rnnoise'), dependency('speexdsp'), platform_dep ],
   include_directories: audio_effect_include_dir,
   link_with: [audio_effect_shared],
   install: true,
index 1a465276b546b062ce820da917067d0ced14be01..f30e4374586d4c69212563cfdd70feb637da1294 100644 (file)
@@ -1,6 +1,6 @@
 Name:       libaudio-effect
 Summary:    audio effect library
-Version:    0.0.19
+Version:    0.0.20
 Release:    0
 Group:      System/Libraries
 License:    Apache-2.0
index 1bf3a1c55dceaf7ebb6f290621f8c60e6c46483a..423e16f442ca6376c59b140f139f02eb8b83518a 100644 (file)
 #include <audio_effect_util.h>
 
 #include <rnnoise.h>
+#include <speex/speex_resampler.h>
 
-#define FIXED_FRAME_SIZE       480
-#define MAX_CHANNELS           2
+#define WORKING_SAMPLERATE     48000
+#define MAX_CHANNELS   2
+
+#define FIXED_FRAME_SIZE_MSEC  10
+#define WORKING_FRAMESIZE      480
+#define RESAMPLER_QUALITY      10
+
+enum {
+       RESAMPLER_FROM_WORKING_RATE,
+       RESAMPLER_TO_WORKING_RATE,
+       RESAMPLER_MAX,
+};
+
+struct resampler_info_s {
+       SpeexResamplerState *resampler[RESAMPLER_MAX];
+       size_t from_frames;
+       size_t to_frames;
+       short *resample_buf;
+};
 
 struct userdata {
        DenoiseState *st[MAX_CHANNELS];
        int rate;
        int channels;
        audio_effect_format_e format;
-       size_t frames;
+       size_t support_framesize;
 
        float *fbuffer;
        float *d_fbuffer[MAX_CHANNELS];
+
+       struct resampler_info_s *resampler_info;
+       void (*process_func)(struct userdata *, short *, short *);
 };
 
+static size_t get_framesize_ms(int rate, int ms)
+{
+       return (size_t)(rate / (1000 / ms));
+}
+
 static void ns_rnnoise_process_mono(struct userdata *u, short *in, short *out)
 {
        short *ptr = in;
        int i;
 
-       for (i = 0; i < u->frames; i++)
+       for (i = 0; i < u->support_framesize; i++)
                u->fbuffer[i] = ptr[i];
 
        rnnoise_process_frame(u->st[0], u->fbuffer, u->fbuffer);
 
        ptr = out;
-       for (i = 0; i < u->frames; i++)
+       for (i = 0; i < u->support_framesize; i++)
                ptr[i] = u->fbuffer[i];
 }
 
@@ -60,40 +86,145 @@ static void ns_rnnoise_process_stereo(struct userdata *u, short *in, short *out)
 
        assert(u->channels > 1);
 
-       for (i = 0; i < u->frames * u->channels; i++)
+       for (i = 0; i < u->support_framesize * u->channels; i++)
                u->fbuffer[i] = ptr[i];
 
-       audio_effect_util_deinterleave(u->fbuffer, (void **)u->d_fbuffer, u->channels, float_sample_size, u->frames);
+       audio_effect_util_deinterleave(u->fbuffer, (void **)u->d_fbuffer, u->channels, float_sample_size, u->support_framesize);
 
        for (i = 0; i < u->channels; i++)
                rnnoise_process_frame(u->st[i], u->d_fbuffer[i], u->d_fbuffer[i]);
 
-       audio_effect_util_interleave((const void **)u->d_fbuffer, u->fbuffer, u->channels, float_sample_size, u->frames);
+       audio_effect_util_interleave((const void **)u->d_fbuffer, u->fbuffer, u->channels, float_sample_size, u->support_framesize);
 
        ptr = out;
-       for (i = 0; i < u->frames * u->channels; i++)
+       for (i = 0; i < u->support_framesize * u->channels; i++)
                ptr[i] = u->fbuffer[i];
 }
 
+static void ns_rnnoise_process_auto(struct userdata *u, short *in, short *out)
+{
+       spx_uint32_t in_frames;
+       spx_uint32_t out_frames;
+
+       assert(u);
+       assert(in);
+       assert(out);
+       assert(u->process_func);
+
+       if (!u->resampler_info) {
+               u->process_func(u, (short *)in, (short *)out);
+               return;
+       }
+
+       in_frames = u->resampler_info->from_frames;
+       out_frames = u->resampler_info->to_frames;
+
+       assert(!speex_resampler_process_interleaved_int(u->resampler_info->resampler[RESAMPLER_TO_WORKING_RATE],
+                                               (const spx_int16_t *)in, (spx_uint32_t *)&in_frames,
+                                               (spx_int16_t *)u->resampler_info->resample_buf, (spx_uint32_t *)&out_frames));
+
+       u->process_func(u, (short *)u->resampler_info->resample_buf, (short *)u->resampler_info->resample_buf);
+
+       in_frames = u->resampler_info->to_frames;
+       out_frames = u->resampler_info->from_frames;
+
+       assert(!speex_resampler_process_interleaved_int(u->resampler_info->resampler[RESAMPLER_FROM_WORKING_RATE],
+                                               (const spx_int16_t *)u->resampler_info->resample_buf, (spx_uint32_t *)&in_frames,
+                                               (spx_int16_t *)out, (spx_uint32_t *)&out_frames));
+}
+
+static void ns_rnnoise_destroy_resampler_info(struct resampler_info_s *resampler_info)
+{
+       int i;
+       SpeexResamplerState **resampler;
+
+       if (!resampler_info)
+               return;
+
+       resampler = resampler_info->resampler;
+
+       for (i = 0; i < RESAMPLER_MAX; i++) {
+               if (resampler[i]) {
+                       speex_resampler_destroy(resampler[i]);
+                       resampler[i] = NULL;
+               }
+       }
+
+       if (resampler_info->resample_buf)
+               free(resampler_info->resample_buf);
+
+       free(resampler_info);
+}
+
+static int ns_rnnoise_create_resampler_info(struct resampler_info_s **resampler_info, int rate, int channels)
+{
+       struct resampler_info_s *_resampler_info;
+       SpeexResamplerState **resampler;
+
+       assert(resampler_info);
+
+       if (rate == WORKING_SAMPLERATE)
+               return 0;
+
+       _resampler_info = (struct resampler_info_s *)calloc(1, sizeof(struct resampler_info_s));
+       if (!_resampler_info) {
+               LOG_ERROR("Failed to allocate resampler info");
+               return -1;
+       }
+
+       resampler = _resampler_info->resampler;
+
+       resampler[RESAMPLER_FROM_WORKING_RATE] = speex_resampler_init(channels, WORKING_SAMPLERATE, rate, RESAMPLER_QUALITY, NULL);
+       if (!resampler[RESAMPLER_FROM_WORKING_RATE]) {
+               LOG_ERROR("Failed to allocate resampler. rate(%d->%d)", WORKING_SAMPLERATE, rate);
+               goto fail;
+       }
+       speex_resampler_skip_zeros(resampler[RESAMPLER_FROM_WORKING_RATE]);
+
+       resampler[RESAMPLER_TO_WORKING_RATE] = speex_resampler_init(channels, rate, WORKING_SAMPLERATE, RESAMPLER_QUALITY, NULL);
+       if (!resampler[RESAMPLER_TO_WORKING_RATE]) {
+               LOG_ERROR("Failed to allocate resampler. rate(%d->%d)", rate, WORKING_SAMPLERATE);
+               goto fail;
+       }
+       speex_resampler_skip_zeros(resampler[RESAMPLER_TO_WORKING_RATE]);
+
+       /* 10ms */
+       _resampler_info->from_frames = get_framesize_ms(rate, FIXED_FRAME_SIZE_MSEC);
+       _resampler_info->to_frames = WORKING_FRAMESIZE;
+       _resampler_info->resample_buf = (short *)malloc(_resampler_info->to_frames * sizeof(short) * channels);
+       if (!_resampler_info->resample_buf) {
+               LOG_ERROR("Failed to allocate resampled buffer");
+               goto fail;
+       }
+
+       *resampler_info = _resampler_info;
+
+       LOG_INFO("Created resampler. from_frames(%zu), to_frames(%zu)", _resampler_info->from_frames, _resampler_info->to_frames);
+
+       return 0;
+
+fail:
+       ns_rnnoise_destroy_resampler_info(_resampler_info);
+
+       return -1;
+}
+
 static void *ns_rnnoise_create(int rate, int channels, audio_effect_format_e format, size_t frames)
 {
        struct userdata *u;
        int i;
 
+       if (frames != get_framesize_ms(rate, FIXED_FRAME_SIZE_MSEC))
+               LOG_WARN("Not support rate(%d), frames(%zu).", rate, frames);
+
        u = (struct userdata *)calloc(1, sizeof(struct userdata));
 
        u->rate = rate;
        u->channels = channels;
        u->format = format;
+       u->support_framesize = WORKING_FRAMESIZE;
 
-       if (frames != FIXED_FRAME_SIZE) {
-               LOG_WARN("Not support frames(%zu). It will be set to fixed frame_size(%u)",
-                               frames, FIXED_FRAME_SIZE);
-       }
-
-       u->frames = FIXED_FRAME_SIZE;
-
-       u->fbuffer = (float *)malloc(sizeof(float) * u->frames * channels);
+       u->fbuffer = (float *)malloc(sizeof(float) * channels * u->support_framesize);
        if (!u->fbuffer) {
                LOG_ERROR("Failed to allocate fbuffer");
                goto fail;
@@ -110,7 +241,7 @@ static void *ns_rnnoise_create(int rate, int channels, audio_effect_format_e for
        /* it doesn't need d_fbuffer in case of mono */
        if (u->channels > 1) {
                for (i = 0; i < u->channels; i++) {
-                       u->d_fbuffer[i] = (float *)malloc(sizeof(float) * u->frames);
+                       u->d_fbuffer[i] = (float *)malloc(sizeof(float) * u->support_framesize);
                        if (!u->d_fbuffer[i]) {
                                LOG_ERROR("Failed to allocate fbuffer");
                                goto fail;
@@ -118,8 +249,14 @@ static void *ns_rnnoise_create(int rate, int channels, audio_effect_format_e for
                }
        }
 
-       LOG_INFO("plugin rnnoise init. rate(%d), channels(%d), format(%d), frames(%zu)",
-                               rate, channels, format, u->frames);
+       if (ns_rnnoise_create_resampler_info(&u->resampler_info, rate, channels) < 0) {
+               LOG_ERROR("Failed to create resampler. rate(%d), channels(%d)", rate, channels);
+               goto fail;
+       }
+
+       u->process_func = u->channels == 1 ? ns_rnnoise_process_mono : ns_rnnoise_process_stereo;
+
+       LOG_INFO("plugin rnnoise init. rate(%d), channels(%d), format(%d)", rate, channels, format);
 
        return u;
 
@@ -134,6 +271,8 @@ fail:
        if (u->fbuffer)
                free(u->fbuffer);
 
+       ns_rnnoise_destroy_resampler_info(u->resampler_info);
+
        free(u);
 
        return NULL;
@@ -147,10 +286,7 @@ static int ns_rnnoise_process(void *priv, char *in, char *out)
        assert(in);
        assert(out);
 
-       if (u->channels == 1)
-               ns_rnnoise_process_mono(u, (short *)in, (short *)out);
-       else
-               ns_rnnoise_process_stereo(u, (short *)in, (short *)out);
+       ns_rnnoise_process_auto(u, (short *)in, (short *)out);
 
        return 0;
 }
@@ -179,6 +315,8 @@ static void ns_rnnoise_destroy(void *priv)
                u->fbuffer = NULL;
        }
 
+       ns_rnnoise_destroy_resampler_info(u->resampler_info);
+
        free(u);
 
        u = NULL;
@@ -193,8 +331,8 @@ static audio_effect_plugin_info_s ns_rnnoise_desc = {
                .destroy = ns_rnnoise_destroy,
        },
        .constraint = {
-               .frames = FIXED_FRAME_SIZE,
-               .min_rate = 48000,
+               .frames_msec = FIXED_FRAME_SIZE_MSEC,
+               .min_rate = 16000,
                .max_rate = 48000,
                .min_channels = 1,
                .max_channels = MAX_CHANNELS,
index f54f9af90fa9d141fcbc03f4b584a4c0d97f9ea8..ad0897c366d2aa09a8f5d6336945448e42e5be73 100644 (file)
@@ -3,15 +3,16 @@
 resource_file_list = [ [ 'SPEECH_FILE_NAME', 'speech.raw' ],
                         [ 'SPEECH_STEREO_FILE_NAME', 'speech_2ch.raw' ],
                         [ 'SPEECH_48K_STEREO_FILE_NAME', 'speech_48k_2ch.raw' ],
-                        [ 'NOISE_48K_FILE_NAME', 'noise_48k.raw' ],
-                        [ 'NOISE_48K_STEREO_FILE_NAME', 'noise_48k_2ch.raw' ],
+                        [ 'NOISE_48K_FILE_NAME', 'noise_48k_1ch.raw' ],
+                        [ 'NOISE_441K_MONO_FILE_NAME', 'noise_441k_1ch.raw' ],
+                        [ 'NOISE_441K_STEREO_FILE_NAME', 'noise_441k_2ch.raw' ],
                         [ 'AEC_REFERENCE_FILE_NAME', 'rec.raw' ],
                         [ 'AEC_RECORDING_FILE_NAME', 'ref.raw' ],
                         [ 'AEC_REFERENCE_COPY_5CH_FILE_NAME', 'rec_refcopy_5ch.raw' ] ]
 
 cdata = configuration_data()
 foreach r : resource_file_list
-  configure_file(input: join_paths('resources/', r[1]), output: r[1], copy: false)
+  configure_file(input: join_paths('resources/', r[1]), output: r[1], copy: true)
   cdata.set_quoted(r[0], r[1])
 endforeach
 configure_file(output : 'resources.h', configuration : cdata)
index cf4120079ad751d675d63de7c3cae441dd73d13c..868317d18607a3db9feb2d39bca8bfc755fae6df 100644 (file)
@@ -5,67 +5,75 @@
 #include "audio_effect.h"
 #include "resources.h"
 
-#define MAX_CHANNELS 2
-#define FIXED_FRAME_SIZE 480
-#define FRAME_SIZE FIXED_FRAME_SIZE * MAX_CHANNELS
-
-#define OUTPUT_FILE_NAME               "output_rnnoise.raw"
-#define OUTPUT_STEREO_FILE_NAME        "output_rnnoise_2ch.raw"
+#define OUTPUT_441K_1CH_FILE_NAME              "output_rnnoise_441k_1ch.raw"
+#define OUTPUT_441K_2CH_FILE_NAME              "output_rnnoise_441k_2ch.raw"
+#define OUTPUT_48K_FILE_NAME                   "output_rnnoise_48k_1ch.raw"
 
 int main(void)
 {
        audio_effect_s *ae;
        FILE *fin;
        FILE *fout;
-       short in[FRAME_SIZE];
-       short out[FRAME_SIZE];
-       size_t framesize = FRAME_SIZE;
-
+       short *in;
+       short *out;
+       size_t framesize;
        int i;
-       int n = 0;
 
-       int channels[MAX_CHANNELS] = { 1, 2 };
-       char *test_file[] = { NOISE_48K_FILE_NAME, NOISE_48K_STEREO_FILE_NAME };
-       char *out_file[] = { OUTPUT_FILE_NAME, OUTPUT_STEREO_FILE_NAME };
+       int channels[] = { 1, 2, 1 };
+       int rate[] = { 44100, 44100, 48000 };
+       int frame_size[] = { 441, 441, 480 };
+       char *test_file[] = { NOISE_441K_MONO_FILE_NAME, NOISE_441K_STEREO_FILE_NAME, NOISE_48K_FILE_NAME };
+       char *output_file[] = { OUTPUT_441K_1CH_FILE_NAME, OUTPUT_441K_2CH_FILE_NAME, OUTPUT_48K_FILE_NAME };
 
        printf("--- rnnoise start ---\n");
 
-       for (i = 0; i < MAX_CHANNELS; i++) {
+       for (i = 0; i < sizeof(channels)/sizeof(channels[0]); i++) {
                fin = fopen(test_file[i], "r");
                if (!fin) {
-                       printf("failed to find airport_48k.raw\n");
+                       printf("failed to open. file(%s)\n", test_file[i]);
                        exit(-1);
                }
 
-               fout = fopen(out_file[i], "wb");
+               fout = fopen(output_file[i], "wb");
                if (!fout) {
-                       printf("failed to open raw\n");
+                       printf("failed to open file(%s)\n", output_file[i]);
+                       exit(-1);
+               }
+
+               in = (short *)malloc(frame_size[i] * channels[i] * sizeof(short));
+               if (!in) {
+                       printf("failed to allocate in\n");
                        exit(-1);
                }
 
-               ae = audio_effect_create(AUDIO_EFFECT_METHOD_NS_RNNOISE, 48000, channels[i], AUDIO_EFFECT_FORMAT_S16, framesize);
+               out = (short *)malloc(frame_size[i]* channels[i] * sizeof(short));
+               if (!out) {
+                       printf("failed to allocate out\n");
+                       exit(-1);
+               }
+
+               ae = audio_effect_create(AUDIO_EFFECT_METHOD_NS_RNNOISE, rate[i], channels[i], AUDIO_EFFECT_FORMAT_S16, frame_size[i]);
                assert(ae);
 
                framesize = audio_effect_get_process_framesize(ae);
-               assert(framesize == FIXED_FRAME_SIZE);
                printf("frame size %zu\n", framesize);
 
                while (!feof(fin)) {
-                       if (fread(in, sizeof(short) * channels[i], framesize, fin) < 0)
+                       if (fread(in, sizeof(short) * channels[i] * framesize, 1, fin) < 0)
                                break;
 
-                       printf("#%d frame. ", n++);
-
                        if (audio_effect_process(ae, in, out) < 0) {
                                printf("(failed!)\n");
                        } else {
-                               printf("(success!)\n");
-                               fwrite(out, sizeof(short) * channels[i], framesize, fout);
+                               fwrite(out, sizeof(short) * channels[i] * framesize, 1, fout);
                        }
                }
 
                audio_effect_destroy(ae);
 
+               free(in);
+               free(out);
+
                fclose(fin);
                fclose(fout);
        }
diff --git a/test/resources/noise_441k_1ch.raw b/test/resources/noise_441k_1ch.raw
new file mode 100644 (file)
index 0000000..24db566
Binary files /dev/null and b/test/resources/noise_441k_1ch.raw differ
diff --git a/test/resources/noise_441k_2ch.raw b/test/resources/noise_441k_2ch.raw
new file mode 100644 (file)
index 0000000..2ace046
Binary files /dev/null and b/test/resources/noise_441k_2ch.raw differ
diff --git a/test/resources/noise_48k.raw b/test/resources/noise_48k.raw
deleted file mode 100644 (file)
index 790f343..0000000
Binary files a/test/resources/noise_48k.raw and /dev/null differ
diff --git a/test/resources/noise_48k_1ch.raw b/test/resources/noise_48k_1ch.raw
new file mode 100644 (file)
index 0000000..dfcf3b7
Binary files /dev/null and b/test/resources/noise_48k_1ch.raw differ
diff --git a/test/resources/noise_48k_2ch.raw b/test/resources/noise_48k_2ch.raw
deleted file mode 100644 (file)
index f2bab8a..0000000
Binary files a/test/resources/noise_48k_2ch.raw and /dev/null differ