rnnoise: Support various samplerate

author Jaechul Lee <jcsing.lee@samsung.com>

Wed, 29 Nov 2023 04:47:50 +0000 (13:47 +0900)

committer Jaechul Lee <jcsing.lee@samsung.com>

Tue, 19 Dec 2023 02:33:52 +0000 (11:33 +0900)
author Jaechul Lee <jcsing.lee@samsung.com>
Wed, 29 Nov 2023 04:47:50 +0000 (13:47 +0900)
committer Jaechul Lee <jcsing.lee@samsung.com>
Tue, 19 Dec 2023 02:33:52 +0000 (11:33 +0900)
diff --git a/meson.build b/meson.build

index 8dc5442cd724d90d12bb434941e55f452f1adaf9..73ea86bdb9b2e6b4f188e173ea428daabc73b23d 100644 (file)
--- a/meson.build
+++ b/meson.build
@@ -40,7 +40,7 @@ audio_effect_shared = shared_library('audio-effect',
  if get_option('ns-rnnoise').enabled()
  shared_library('audio-effect-ns-rnnoise',
    sources: [ 'src/plugin_ns_rnnoise.c' ],
-  dependencies: [ dependency('rnnoise'), platform_dep ],
+  dependencies: [ dependency('rnnoise'), dependency('speexdsp'), platform_dep ],
    include_directories: audio_effect_include_dir,
    link_with: [audio_effect_shared],
    install: true,
diff --git a/packaging/libaudio-effect.spec b/packaging/libaudio-effect.spec

index 1a465276b546b062ce820da917067d0ced14be01..f30e4374586d4c69212563cfdd70feb637da1294 100644 (file)
--- a/packaging/libaudio-effect.spec
+++ b/packaging/libaudio-effect.spec
@@ -1,6 +1,6 @@
  Name:       libaudio-effect
  Summary:    audio effect library
-Version:    0.0.19
+Version:    0.0.20
  Release:    0
  Group:      System/Libraries
  License:    Apache-2.0
diff --git a/src/plugin_ns_rnnoise.c b/src/plugin_ns_rnnoise.c

index 1bf3a1c55dceaf7ebb6f290621f8c60e6c46483a..423e16f442ca6376c59b140f139f02eb8b83518a 100644 (file)
--- a/src/plugin_ns_rnnoise.c
+++ b/src/plugin_ns_rnnoise.c
@@ -22,33 +22,59 @@
  #include <audio_effect_util.h>
  
  #include <rnnoise.h>
+#include <speex/speex_resampler.h>
  
-#define FIXED_FRAME_SIZE       480
-#define MAX_CHANNELS           2
+#define WORKING_SAMPLERATE     48000
+#define MAX_CHANNELS   2
+
+#define FIXED_FRAME_SIZE_MSEC  10
+#define WORKING_FRAMESIZE      480
+#define RESAMPLER_QUALITY      10
+
+enum {
+       RESAMPLER_FROM_WORKING_RATE,
+       RESAMPLER_TO_WORKING_RATE,
+       RESAMPLER_MAX,
+};
+
+struct resampler_info_s {
+       SpeexResamplerState *resampler[RESAMPLER_MAX];
+       size_t from_frames;
+       size_t to_frames;
+       short *resample_buf;
+};
  
  struct userdata {
         DenoiseState *st[MAX_CHANNELS];
         int rate;
         int channels;
         audio_effect_format_e format;
-       size_t frames;
+       size_t support_framesize;
  
         float *fbuffer;
         float *d_fbuffer[MAX_CHANNELS];
+
+       struct resampler_info_s *resampler_info;
+       void (*process_func)(struct userdata *, short *, short *);
  };
  
+static size_t get_framesize_ms(int rate, int ms)
+{
+       return (size_t)(rate / (1000 / ms));
+}
+
  static void ns_rnnoise_process_mono(struct userdata *u, short *in, short *out)
  {
         short *ptr = in;
         int i;
  
-       for (i = 0; i < u->frames; i++)
+       for (i = 0; i < u->support_framesize; i++)
                 u->fbuffer[i] = ptr[i];
  
         rnnoise_process_frame(u->st[0], u->fbuffer, u->fbuffer);
  
         ptr = out;
-       for (i = 0; i < u->frames; i++)
+       for (i = 0; i < u->support_framesize; i++)
                 ptr[i] = u->fbuffer[i];
  }
  
@@ -60,40 +86,145 @@ static void ns_rnnoise_process_stereo(struct userdata *u, short *in, short *out)
  
         assert(u->channels > 1);
  
-       for (i = 0; i < u->frames * u->channels; i++)
+       for (i = 0; i < u->support_framesize * u->channels; i++)
                 u->fbuffer[i] = ptr[i];
  
-       audio_effect_util_deinterleave(u->fbuffer, (void **)u->d_fbuffer, u->channels, float_sample_size, u->frames);
+       audio_effect_util_deinterleave(u->fbuffer, (void **)u->d_fbuffer, u->channels, float_sample_size, u->support_framesize);
  
         for (i = 0; i < u->channels; i++)
                 rnnoise_process_frame(u->st[i], u->d_fbuffer[i], u->d_fbuffer[i]);
  
-       audio_effect_util_interleave((const void **)u->d_fbuffer, u->fbuffer, u->channels, float_sample_size, u->frames);
+       audio_effect_util_interleave((const void **)u->d_fbuffer, u->fbuffer, u->channels, float_sample_size, u->support_framesize);
  
         ptr = out;
-       for (i = 0; i < u->frames * u->channels; i++)
+       for (i = 0; i < u->support_framesize * u->channels; i++)
                 ptr[i] = u->fbuffer[i];
  }
  
+static void ns_rnnoise_process_auto(struct userdata *u, short *in, short *out)
+{
+       spx_uint32_t in_frames;
+       spx_uint32_t out_frames;
+
+       assert(u);
+       assert(in);
+       assert(out);
+       assert(u->process_func);
+
+       if (!u->resampler_info) {
+               u->process_func(u, (short *)in, (short *)out);
+               return;
+       }
+
+       in_frames = u->resampler_info->from_frames;
+       out_frames = u->resampler_info->to_frames;
+
+       assert(!speex_resampler_process_interleaved_int(u->resampler_info->resampler[RESAMPLER_TO_WORKING_RATE],
+                                               (const spx_int16_t *)in, (spx_uint32_t *)&in_frames,
+                                               (spx_int16_t *)u->resampler_info->resample_buf, (spx_uint32_t *)&out_frames));
+
+       u->process_func(u, (short *)u->resampler_info->resample_buf, (short *)u->resampler_info->resample_buf);
+
+       in_frames = u->resampler_info->to_frames;
+       out_frames = u->resampler_info->from_frames;
+
+       assert(!speex_resampler_process_interleaved_int(u->resampler_info->resampler[RESAMPLER_FROM_WORKING_RATE],
+                                               (const spx_int16_t *)u->resampler_info->resample_buf, (spx_uint32_t *)&in_frames,
+                                               (spx_int16_t *)out, (spx_uint32_t *)&out_frames));
+}
+
+static void ns_rnnoise_destroy_resampler_info(struct resampler_info_s *resampler_info)
+{
+       int i;
+       SpeexResamplerState **resampler;
+
+       if (!resampler_info)
+               return;
+
+       resampler = resampler_info->resampler;
+
+       for (i = 0; i < RESAMPLER_MAX; i++) {
+               if (resampler[i]) {
+                       speex_resampler_destroy(resampler[i]);
+                       resampler[i] = NULL;
+               }
+       }
+
+       if (resampler_info->resample_buf)
+               free(resampler_info->resample_buf);
+
+       free(resampler_info);
+}
+
+static int ns_rnnoise_create_resampler_info(struct resampler_info_s **resampler_info, int rate, int channels)
+{
+       struct resampler_info_s *_resampler_info;
+       SpeexResamplerState **resampler;
+
+       assert(resampler_info);
+
+       if (rate == WORKING_SAMPLERATE)
+               return 0;
+
+       _resampler_info = (struct resampler_info_s *)calloc(1, sizeof(struct resampler_info_s));
+       if (!_resampler_info) {
+               LOG_ERROR("Failed to allocate resampler info");
+               return -1;
+       }
+
+       resampler = _resampler_info->resampler;
+
+       resampler[RESAMPLER_FROM_WORKING_RATE] = speex_resampler_init(channels, WORKING_SAMPLERATE, rate, RESAMPLER_QUALITY, NULL);
+       if (!resampler[RESAMPLER_FROM_WORKING_RATE]) {
+               LOG_ERROR("Failed to allocate resampler. rate(%d->%d)", WORKING_SAMPLERATE, rate);
+               goto fail;
+       }
+       speex_resampler_skip_zeros(resampler[RESAMPLER_FROM_WORKING_RATE]);
+
+       resampler[RESAMPLER_TO_WORKING_RATE] = speex_resampler_init(channels, rate, WORKING_SAMPLERATE, RESAMPLER_QUALITY, NULL);
+       if (!resampler[RESAMPLER_TO_WORKING_RATE]) {
+               LOG_ERROR("Failed to allocate resampler. rate(%d->%d)", rate, WORKING_SAMPLERATE);
+               goto fail;
+       }
+       speex_resampler_skip_zeros(resampler[RESAMPLER_TO_WORKING_RATE]);
+
+       /* 10ms */
+       _resampler_info->from_frames = get_framesize_ms(rate, FIXED_FRAME_SIZE_MSEC);
+       _resampler_info->to_frames = WORKING_FRAMESIZE;
+       _resampler_info->resample_buf = (short *)malloc(_resampler_info->to_frames * sizeof(short) * channels);
+       if (!_resampler_info->resample_buf) {
+               LOG_ERROR("Failed to allocate resampled buffer");
+               goto fail;
+       }
+
+       *resampler_info = _resampler_info;
+
+       LOG_INFO("Created resampler. from_frames(%zu), to_frames(%zu)", _resampler_info->from_frames, _resampler_info->to_frames);
+
+       return 0;
+
+fail:
+       ns_rnnoise_destroy_resampler_info(_resampler_info);
+
+       return -1;
+}
+
  static void *ns_rnnoise_create(int rate, int channels, audio_effect_format_e format, size_t frames)
  {
         struct userdata *u;
         int i;
  
+       if (frames != get_framesize_ms(rate, FIXED_FRAME_SIZE_MSEC))
+               LOG_WARN("Not support rate(%d), frames(%zu).", rate, frames);
+
         u = (struct userdata *)calloc(1, sizeof(struct userdata));
  
         u->rate = rate;
         u->channels = channels;
         u->format = format;
+       u->support_framesize = WORKING_FRAMESIZE;
  
-       if (frames != FIXED_FRAME_SIZE) {
-               LOG_WARN("Not support frames(%zu). It will be set to fixed frame_size(%u)",
-                               frames, FIXED_FRAME_SIZE);
-       }
-
-       u->frames = FIXED_FRAME_SIZE;
-
-       u->fbuffer = (float *)malloc(sizeof(float) * u->frames * channels);
+       u->fbuffer = (float *)malloc(sizeof(float) * channels * u->support_framesize);
         if (!u->fbuffer) {
                 LOG_ERROR("Failed to allocate fbuffer");
                 goto fail;
@@ -110,7 +241,7 @@ static void *ns_rnnoise_create(int rate, int channels, audio_effect_format_e for
         /* it doesn't need d_fbuffer in case of mono */
         if (u->channels > 1) {
                 for (i = 0; i < u->channels; i++) {
-                       u->d_fbuffer[i] = (float *)malloc(sizeof(float) * u->frames);
+                       u->d_fbuffer[i] = (float *)malloc(sizeof(float) * u->support_framesize);
                         if (!u->d_fbuffer[i]) {
                                 LOG_ERROR("Failed to allocate fbuffer");
                                 goto fail;
@@ -118,8 +249,14 @@ static void *ns_rnnoise_create(int rate, int channels, audio_effect_format_e for
                 }
         }
  
-       LOG_INFO("plugin rnnoise init. rate(%d), channels(%d), format(%d), frames(%zu)",
-                               rate, channels, format, u->frames);
+       if (ns_rnnoise_create_resampler_info(&u->resampler_info, rate, channels) < 0) {
+               LOG_ERROR("Failed to create resampler. rate(%d), channels(%d)", rate, channels);
+               goto fail;
+       }
+
+       u->process_func = u->channels == 1 ? ns_rnnoise_process_mono : ns_rnnoise_process_stereo;
+
+       LOG_INFO("plugin rnnoise init. rate(%d), channels(%d), format(%d)", rate, channels, format);
  
         return u;
  
@@ -134,6 +271,8 @@ fail:
         if (u->fbuffer)
                 free(u->fbuffer);
  
+       ns_rnnoise_destroy_resampler_info(u->resampler_info);
+
         free(u);
  
         return NULL;
@@ -147,10 +286,7 @@ static int ns_rnnoise_process(void *priv, char *in, char *out)
         assert(in);
         assert(out);
  
-       if (u->channels == 1)
-               ns_rnnoise_process_mono(u, (short *)in, (short *)out);
-       else
-               ns_rnnoise_process_stereo(u, (short *)in, (short *)out);
+       ns_rnnoise_process_auto(u, (short *)in, (short *)out);
  
         return 0;
  }
@@ -179,6 +315,8 @@ static void ns_rnnoise_destroy(void *priv)
                 u->fbuffer = NULL;
         }
  
+       ns_rnnoise_destroy_resampler_info(u->resampler_info);
+
         free(u);
  
         u = NULL;
@@ -193,8 +331,8 @@ static audio_effect_plugin_info_s ns_rnnoise_desc = {
                 .destroy = ns_rnnoise_destroy,
         },
         .constraint = {
-               .frames = FIXED_FRAME_SIZE,
-               .min_rate = 48000,
+               .frames_msec = FIXED_FRAME_SIZE_MSEC,
+               .min_rate = 16000,
                 .max_rate = 48000,
                 .min_channels = 1,
                 .max_channels = MAX_CHANNELS,
diff --git a/test/meson.build b/test/meson.build

index f54f9af90fa9d141fcbc03f4b584a4c0d97f9ea8..ad0897c366d2aa09a8f5d6336945448e42e5be73 100644 (file)
--- a/test/meson.build
+++ b/test/meson.build
@@ -3,15 +3,16 @@
  resource_file_list = [ [ 'SPEECH_FILE_NAME', 'speech.raw' ],
                          [ 'SPEECH_STEREO_FILE_NAME', 'speech_2ch.raw' ],
                          [ 'SPEECH_48K_STEREO_FILE_NAME', 'speech_48k_2ch.raw' ],
-                        [ 'NOISE_48K_FILE_NAME', 'noise_48k.raw' ],
-                        [ 'NOISE_48K_STEREO_FILE_NAME', 'noise_48k_2ch.raw' ],
+                        [ 'NOISE_48K_FILE_NAME', 'noise_48k_1ch.raw' ],
+                        [ 'NOISE_441K_MONO_FILE_NAME', 'noise_441k_1ch.raw' ],
+                        [ 'NOISE_441K_STEREO_FILE_NAME', 'noise_441k_2ch.raw' ],
                          [ 'AEC_REFERENCE_FILE_NAME', 'rec.raw' ],
                          [ 'AEC_RECORDING_FILE_NAME', 'ref.raw' ],
                          [ 'AEC_REFERENCE_COPY_5CH_FILE_NAME', 'rec_refcopy_5ch.raw' ] ]
  
  cdata = configuration_data()
  foreach r : resource_file_list
-  configure_file(input: join_paths('resources/', r[1]), output: r[1], copy: false)
+  configure_file(input: join_paths('resources/', r[1]), output: r[1], copy: true)
    cdata.set_quoted(r[0], r[1])
  endforeach
  configure_file(output : 'resources.h', configuration : cdata)
diff --git a/test/ns_rnnoise_test.c b/test/ns_rnnoise_test.c

index cf4120079ad751d675d63de7c3cae441dd73d13c..868317d18607a3db9feb2d39bca8bfc755fae6df 100644 (file)
--- a/test/ns_rnnoise_test.c
+++ b/test/ns_rnnoise_test.c
@@ -5,67 +5,75 @@
  #include "audio_effect.h"
  #include "resources.h"
  
-#define MAX_CHANNELS 2
-#define FIXED_FRAME_SIZE 480
-#define FRAME_SIZE FIXED_FRAME_SIZE * MAX_CHANNELS
-
-#define OUTPUT_FILE_NAME               "output_rnnoise.raw"
-#define OUTPUT_STEREO_FILE_NAME        "output_rnnoise_2ch.raw"
+#define OUTPUT_441K_1CH_FILE_NAME              "output_rnnoise_441k_1ch.raw"
+#define OUTPUT_441K_2CH_FILE_NAME              "output_rnnoise_441k_2ch.raw"
+#define OUTPUT_48K_FILE_NAME                   "output_rnnoise_48k_1ch.raw"
  
  int main(void)
  {
         audio_effect_s *ae;
         FILE *fin;
         FILE *fout;
-       short in[FRAME_SIZE];
-       short out[FRAME_SIZE];
-       size_t framesize = FRAME_SIZE;
-
+       short *in;
+       short *out;
+       size_t framesize;
         int i;
-       int n = 0;
  
-       int channels[MAX_CHANNELS] = { 1, 2 };
-       char *test_file[] = { NOISE_48K_FILE_NAME, NOISE_48K_STEREO_FILE_NAME };
-       char *out_file[] = { OUTPUT_FILE_NAME, OUTPUT_STEREO_FILE_NAME };
+       int channels[] = { 1, 2, 1 };
+       int rate[] = { 44100, 44100, 48000 };
+       int frame_size[] = { 441, 441, 480 };
+       char *test_file[] = { NOISE_441K_MONO_FILE_NAME, NOISE_441K_STEREO_FILE_NAME, NOISE_48K_FILE_NAME };
+       char *output_file[] = { OUTPUT_441K_1CH_FILE_NAME, OUTPUT_441K_2CH_FILE_NAME, OUTPUT_48K_FILE_NAME };
  
         printf("--- rnnoise start ---\n");
  
-       for (i = 0; i < MAX_CHANNELS; i++) {
+       for (i = 0; i < sizeof(channels)/sizeof(channels[0]); i++) {
                 fin = fopen(test_file[i], "r");
                 if (!fin) {
-                       printf("failed to find airport_48k.raw\n");
+                       printf("failed to open. file(%s)\n", test_file[i]);
                         exit(-1);
                 }
  
-               fout = fopen(out_file[i], "wb");
+               fout = fopen(output_file[i], "wb");
                 if (!fout) {
-                       printf("failed to open raw\n");
+                       printf("failed to open file(%s)\n", output_file[i]);
+                       exit(-1);
+               }
+
+               in = (short *)malloc(frame_size[i] * channels[i] * sizeof(short));
+               if (!in) {
+                       printf("failed to allocate in\n");
                         exit(-1);
                 }
  
-               ae = audio_effect_create(AUDIO_EFFECT_METHOD_NS_RNNOISE, 48000, channels[i], AUDIO_EFFECT_FORMAT_S16, framesize);
+               out = (short *)malloc(frame_size[i]* channels[i] * sizeof(short));
+               if (!out) {
+                       printf("failed to allocate out\n");
+                       exit(-1);
+               }
+
+               ae = audio_effect_create(AUDIO_EFFECT_METHOD_NS_RNNOISE, rate[i], channels[i], AUDIO_EFFECT_FORMAT_S16, frame_size[i]);
                 assert(ae);
  
                 framesize = audio_effect_get_process_framesize(ae);
-               assert(framesize == FIXED_FRAME_SIZE);
                 printf("frame size %zu\n", framesize);
  
                 while (!feof(fin)) {
-                       if (fread(in, sizeof(short) * channels[i], framesize, fin) < 0)
+                       if (fread(in, sizeof(short) * channels[i] * framesize, 1, fin) < 0)
                                 break;
  
-                       printf("#%d frame. ", n++);
-
                         if (audio_effect_process(ae, in, out) < 0) {
                                 printf("(failed!)\n");
                         } else {
-                               printf("(success!)\n");
-                               fwrite(out, sizeof(short) * channels[i], framesize, fout);
+                               fwrite(out, sizeof(short) * channels[i] * framesize, 1, fout);
                         }
                 }
  
                 audio_effect_destroy(ae);
  
+               free(in);
+               free(out);
+
                 fclose(fin);
                 fclose(fout);
         }
diff --git a/test/resources/noise_441k_1ch.raw b/test/resources/noise_441k_1ch.raw

new file mode 100644 (file)

index 0000000..24db566

Binary files /dev/null and b/test/resources/noise_441k_1ch.raw differ
diff --git a/test/resources/noise_441k_2ch.raw b/test/resources/noise_441k_2ch.raw

new file mode 100644 (file)

index 0000000..2ace046

Binary files /dev/null and b/test/resources/noise_441k_2ch.raw differ
diff --git a/test/resources/noise_48k.raw b/test/resources/noise_48k.raw

deleted file mode 100644 (file)

index 790f343..0000000

Binary files a/test/resources/noise_48k.raw and /dev/null differ
diff --git a/test/resources/noise_48k_1ch.raw b/test/resources/noise_48k_1ch.raw

new file mode 100644 (file)

index 0000000..dfcf3b7

Binary files /dev/null and b/test/resources/noise_48k_1ch.raw differ
diff --git a/test/resources/noise_48k_2ch.raw b/test/resources/noise_48k_2ch.raw

deleted file mode 100644 (file)

index f2bab8a..0000000

Binary files a/test/resources/noise_48k_2ch.raw and /dev/null differ
author	Jaechul Lee <jcsing.lee@samsung.com>
	Wed, 29 Nov 2023 04:47:50 +0000 (13:47 +0900)
committer	Jaechul Lee <jcsing.lee@samsung.com>
	Tue, 19 Dec 2023 02:33:52 +0000 (11:33 +0900)
meson.build		patch \| blob \| history
packaging/libaudio-effect.spec		patch \| blob \| history
src/plugin_ns_rnnoise.c		patch \| blob \| history
test/meson.build		patch \| blob \| history
test/ns_rnnoise_test.c		patch \| blob \| history
test/resources/noise_441k_1ch.raw	[new file with mode: 0644]	patch \| blob
test/resources/noise_441k_2ch.raw	[new file with mode: 0644]	patch \| blob
test/resources/noise_48k.raw	[deleted file]	patch \| blob \| history
test/resources/noise_48k_1ch.raw	[new file with mode: 0644]	patch \| blob
test/resources/noise_48k_2ch.raw	[deleted file]	patch \| blob \| history