#include <audio_effect_util.h>
#include <rnnoise.h>
+#include <speex/speex_resampler.h>
-#define FIXED_FRAME_SIZE 480
-#define MAX_CHANNELS 2
+#define WORKING_SAMPLERATE 48000
+#define MAX_CHANNELS 2
+
+#define FIXED_FRAME_SIZE_MSEC 10
+#define WORKING_FRAMESIZE 480
+#define RESAMPLER_QUALITY 10
+
+enum {
+ RESAMPLER_FROM_WORKING_RATE,
+ RESAMPLER_TO_WORKING_RATE,
+ RESAMPLER_MAX,
+};
+
+struct resampler_info_s {
+ SpeexResamplerState *resampler[RESAMPLER_MAX];
+ size_t from_frames;
+ size_t to_frames;
+ short *resample_buf;
+};
struct userdata {
DenoiseState *st[MAX_CHANNELS];
int rate;
int channels;
audio_effect_format_e format;
- size_t frames;
+ size_t support_framesize;
float *fbuffer;
float *d_fbuffer[MAX_CHANNELS];
+
+ struct resampler_info_s *resampler_info;
+ void (*process_func)(struct userdata *, short *, short *);
};
+static size_t get_framesize_ms(int rate, int ms)
+{
+ return (size_t)(rate / (1000 / ms));
+}
+
static void ns_rnnoise_process_mono(struct userdata *u, short *in, short *out)
{
short *ptr = in;
int i;
- for (i = 0; i < u->frames; i++)
+ for (i = 0; i < u->support_framesize; i++)
u->fbuffer[i] = ptr[i];
rnnoise_process_frame(u->st[0], u->fbuffer, u->fbuffer);
ptr = out;
- for (i = 0; i < u->frames; i++)
+ for (i = 0; i < u->support_framesize; i++)
ptr[i] = u->fbuffer[i];
}
assert(u->channels > 1);
- for (i = 0; i < u->frames * u->channels; i++)
+ for (i = 0; i < u->support_framesize * u->channels; i++)
u->fbuffer[i] = ptr[i];
- audio_effect_util_deinterleave(u->fbuffer, (void **)u->d_fbuffer, u->channels, float_sample_size, u->frames);
+ audio_effect_util_deinterleave(u->fbuffer, (void **)u->d_fbuffer, u->channels, float_sample_size, u->support_framesize);
for (i = 0; i < u->channels; i++)
rnnoise_process_frame(u->st[i], u->d_fbuffer[i], u->d_fbuffer[i]);
- audio_effect_util_interleave((const void **)u->d_fbuffer, u->fbuffer, u->channels, float_sample_size, u->frames);
+ audio_effect_util_interleave((const void **)u->d_fbuffer, u->fbuffer, u->channels, float_sample_size, u->support_framesize);
ptr = out;
- for (i = 0; i < u->frames * u->channels; i++)
+ for (i = 0; i < u->support_framesize * u->channels; i++)
ptr[i] = u->fbuffer[i];
}
+static void ns_rnnoise_process_auto(struct userdata *u, short *in, short *out)
+{
+ spx_uint32_t in_frames;
+ spx_uint32_t out_frames;
+
+ assert(u);
+ assert(in);
+ assert(out);
+ assert(u->process_func);
+
+ if (!u->resampler_info) {
+ u->process_func(u, (short *)in, (short *)out);
+ return;
+ }
+
+ in_frames = u->resampler_info->from_frames;
+ out_frames = u->resampler_info->to_frames;
+
+ assert(!speex_resampler_process_interleaved_int(u->resampler_info->resampler[RESAMPLER_TO_WORKING_RATE],
+ (const spx_int16_t *)in, (spx_uint32_t *)&in_frames,
+ (spx_int16_t *)u->resampler_info->resample_buf, (spx_uint32_t *)&out_frames));
+
+ u->process_func(u, (short *)u->resampler_info->resample_buf, (short *)u->resampler_info->resample_buf);
+
+ in_frames = u->resampler_info->to_frames;
+ out_frames = u->resampler_info->from_frames;
+
+ assert(!speex_resampler_process_interleaved_int(u->resampler_info->resampler[RESAMPLER_FROM_WORKING_RATE],
+ (const spx_int16_t *)u->resampler_info->resample_buf, (spx_uint32_t *)&in_frames,
+ (spx_int16_t *)out, (spx_uint32_t *)&out_frames));
+}
+
+static void ns_rnnoise_destroy_resampler_info(struct resampler_info_s *resampler_info)
+{
+ int i;
+ SpeexResamplerState **resampler;
+
+ if (!resampler_info)
+ return;
+
+ resampler = resampler_info->resampler;
+
+ for (i = 0; i < RESAMPLER_MAX; i++) {
+ if (resampler[i]) {
+ speex_resampler_destroy(resampler[i]);
+ resampler[i] = NULL;
+ }
+ }
+
+ if (resampler_info->resample_buf)
+ free(resampler_info->resample_buf);
+
+ free(resampler_info);
+}
+
+static int ns_rnnoise_create_resampler_info(struct resampler_info_s **resampler_info, int rate, int channels)
+{
+ struct resampler_info_s *_resampler_info;
+ SpeexResamplerState **resampler;
+
+ assert(resampler_info);
+
+ if (rate == WORKING_SAMPLERATE)
+ return 0;
+
+ _resampler_info = (struct resampler_info_s *)calloc(1, sizeof(struct resampler_info_s));
+ if (!_resampler_info) {
+ LOG_ERROR("Failed to allocate resampler info");
+ return -1;
+ }
+
+ resampler = _resampler_info->resampler;
+
+ resampler[RESAMPLER_FROM_WORKING_RATE] = speex_resampler_init(channels, WORKING_SAMPLERATE, rate, RESAMPLER_QUALITY, NULL);
+ if (!resampler[RESAMPLER_FROM_WORKING_RATE]) {
+ LOG_ERROR("Failed to allocate resampler. rate(%d->%d)", WORKING_SAMPLERATE, rate);
+ goto fail;
+ }
+ speex_resampler_skip_zeros(resampler[RESAMPLER_FROM_WORKING_RATE]);
+
+ resampler[RESAMPLER_TO_WORKING_RATE] = speex_resampler_init(channels, rate, WORKING_SAMPLERATE, RESAMPLER_QUALITY, NULL);
+ if (!resampler[RESAMPLER_TO_WORKING_RATE]) {
+ LOG_ERROR("Failed to allocate resampler. rate(%d->%d)", rate, WORKING_SAMPLERATE);
+ goto fail;
+ }
+ speex_resampler_skip_zeros(resampler[RESAMPLER_TO_WORKING_RATE]);
+
+ /* 10ms */
+ _resampler_info->from_frames = get_framesize_ms(rate, FIXED_FRAME_SIZE_MSEC);
+ _resampler_info->to_frames = WORKING_FRAMESIZE;
+ _resampler_info->resample_buf = (short *)malloc(_resampler_info->to_frames * sizeof(short) * channels);
+ if (!_resampler_info->resample_buf) {
+ LOG_ERROR("Failed to allocate resampled buffer");
+ goto fail;
+ }
+
+ *resampler_info = _resampler_info;
+
+ LOG_INFO("Created resampler. from_frames(%zu), to_frames(%zu)", _resampler_info->from_frames, _resampler_info->to_frames);
+
+ return 0;
+
+fail:
+ ns_rnnoise_destroy_resampler_info(_resampler_info);
+
+ return -1;
+}
+
static void *ns_rnnoise_create(int rate, int channels, audio_effect_format_e format, size_t frames)
{
struct userdata *u;
int i;
+ if (frames != get_framesize_ms(rate, FIXED_FRAME_SIZE_MSEC))
+ LOG_WARN("Not support rate(%d), frames(%zu).", rate, frames);
+
u = (struct userdata *)calloc(1, sizeof(struct userdata));
u->rate = rate;
u->channels = channels;
u->format = format;
+ u->support_framesize = WORKING_FRAMESIZE;
- if (frames != FIXED_FRAME_SIZE) {
- LOG_WARN("Not support frames(%zu). It will be set to fixed frame_size(%u)",
- frames, FIXED_FRAME_SIZE);
- }
-
- u->frames = FIXED_FRAME_SIZE;
-
- u->fbuffer = (float *)malloc(sizeof(float) * u->frames * channels);
+ u->fbuffer = (float *)malloc(sizeof(float) * channels * u->support_framesize);
if (!u->fbuffer) {
LOG_ERROR("Failed to allocate fbuffer");
goto fail;
/* it doesn't need d_fbuffer in case of mono */
if (u->channels > 1) {
for (i = 0; i < u->channels; i++) {
- u->d_fbuffer[i] = (float *)malloc(sizeof(float) * u->frames);
+ u->d_fbuffer[i] = (float *)malloc(sizeof(float) * u->support_framesize);
if (!u->d_fbuffer[i]) {
LOG_ERROR("Failed to allocate fbuffer");
goto fail;
}
}
- LOG_INFO("plugin rnnoise init. rate(%d), channels(%d), format(%d), frames(%zu)",
- rate, channels, format, u->frames);
+ if (ns_rnnoise_create_resampler_info(&u->resampler_info, rate, channels) < 0) {
+ LOG_ERROR("Failed to create resampler. rate(%d), channels(%d)", rate, channels);
+ goto fail;
+ }
+
+ u->process_func = u->channels == 1 ? ns_rnnoise_process_mono : ns_rnnoise_process_stereo;
+
+ LOG_INFO("plugin rnnoise init. rate(%d), channels(%d), format(%d)", rate, channels, format);
return u;
if (u->fbuffer)
free(u->fbuffer);
+ ns_rnnoise_destroy_resampler_info(u->resampler_info);
+
free(u);
return NULL;
assert(in);
assert(out);
- if (u->channels == 1)
- ns_rnnoise_process_mono(u, (short *)in, (short *)out);
- else
- ns_rnnoise_process_stereo(u, (short *)in, (short *)out);
+ ns_rnnoise_process_auto(u, (short *)in, (short *)out);
return 0;
}
u->fbuffer = NULL;
}
+ ns_rnnoise_destroy_resampler_info(u->resampler_info);
+
free(u);
u = NULL;
.destroy = ns_rnnoise_destroy,
},
.constraint = {
- .frames = FIXED_FRAME_SIZE,
- .min_rate = 48000,
+ .frames_msec = FIXED_FRAME_SIZE_MSEC,
+ .min_rate = 16000,
.max_rate = 48000,
.min_channels = 1,
.max_channels = MAX_CHANNELS,
#include "audio_effect.h"
#include "resources.h"
-#define MAX_CHANNELS 2
-#define FIXED_FRAME_SIZE 480
-#define FRAME_SIZE FIXED_FRAME_SIZE * MAX_CHANNELS
-
-#define OUTPUT_FILE_NAME "output_rnnoise.raw"
-#define OUTPUT_STEREO_FILE_NAME "output_rnnoise_2ch.raw"
+#define OUTPUT_441K_1CH_FILE_NAME "output_rnnoise_441k_1ch.raw"
+#define OUTPUT_441K_2CH_FILE_NAME "output_rnnoise_441k_2ch.raw"
+#define OUTPUT_48K_FILE_NAME "output_rnnoise_48k_1ch.raw"
int main(void)
{
audio_effect_s *ae;
FILE *fin;
FILE *fout;
- short in[FRAME_SIZE];
- short out[FRAME_SIZE];
- size_t framesize = FRAME_SIZE;
-
+ short *in;
+ short *out;
+ size_t framesize;
int i;
- int n = 0;
- int channels[MAX_CHANNELS] = { 1, 2 };
- char *test_file[] = { NOISE_48K_FILE_NAME, NOISE_48K_STEREO_FILE_NAME };
- char *out_file[] = { OUTPUT_FILE_NAME, OUTPUT_STEREO_FILE_NAME };
+ int channels[] = { 1, 2, 1 };
+ int rate[] = { 44100, 44100, 48000 };
+ int frame_size[] = { 441, 441, 480 };
+ char *test_file[] = { NOISE_441K_MONO_FILE_NAME, NOISE_441K_STEREO_FILE_NAME, NOISE_48K_FILE_NAME };
+ char *output_file[] = { OUTPUT_441K_1CH_FILE_NAME, OUTPUT_441K_2CH_FILE_NAME, OUTPUT_48K_FILE_NAME };
printf("--- rnnoise start ---\n");
- for (i = 0; i < MAX_CHANNELS; i++) {
+ for (i = 0; i < sizeof(channels)/sizeof(channels[0]); i++) {
fin = fopen(test_file[i], "r");
if (!fin) {
- printf("failed to find airport_48k.raw\n");
+ printf("failed to open. file(%s)\n", test_file[i]);
exit(-1);
}
- fout = fopen(out_file[i], "wb");
+ fout = fopen(output_file[i], "wb");
if (!fout) {
- printf("failed to open raw\n");
+ printf("failed to open file(%s)\n", output_file[i]);
+ exit(-1);
+ }
+
+ in = (short *)malloc(frame_size[i] * channels[i] * sizeof(short));
+ if (!in) {
+ printf("failed to allocate in\n");
exit(-1);
}
- ae = audio_effect_create(AUDIO_EFFECT_METHOD_NS_RNNOISE, 48000, channels[i], AUDIO_EFFECT_FORMAT_S16, framesize);
+ out = (short *)malloc(frame_size[i]* channels[i] * sizeof(short));
+ if (!out) {
+ printf("failed to allocate out\n");
+ exit(-1);
+ }
+
+ ae = audio_effect_create(AUDIO_EFFECT_METHOD_NS_RNNOISE, rate[i], channels[i], AUDIO_EFFECT_FORMAT_S16, frame_size[i]);
assert(ae);
framesize = audio_effect_get_process_framesize(ae);
- assert(framesize == FIXED_FRAME_SIZE);
printf("frame size %zu\n", framesize);
while (!feof(fin)) {
- if (fread(in, sizeof(short) * channels[i], framesize, fin) < 0)
+ if (fread(in, sizeof(short) * channels[i] * framesize, 1, fin) < 0)
break;
- printf("#%d frame. ", n++);
-
if (audio_effect_process(ae, in, out) < 0) {
printf("(failed!)\n");
} else {
- printf("(success!)\n");
- fwrite(out, sizeof(short) * channels[i], framesize, fout);
+ fwrite(out, sizeof(short) * channels[i] * framesize, 1, fout);
}
}
audio_effect_destroy(ae);
+ free(in);
+ free(out);
+
fclose(fin);
fclose(fout);
}