Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / third_party / webrtc / modules / audio_processing / test / process_test.cc
index 8a5d0f5..05f4b77 100644 (file)
@@ -19,6 +19,7 @@
 
 #include "webrtc/common.h"
 #include "webrtc/modules/audio_processing/include/audio_processing.h"
+#include "webrtc/modules/audio_processing/test/test_utils.h"
 #include "webrtc/modules/interface/module_common_types.h"
 #include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
 #include "webrtc/system_wrappers/interface/scoped_ptr.h"
 #include "webrtc/audio_processing/debug.pb.h"
 #endif
 
-using webrtc::AudioFrame;
-using webrtc::AudioProcessing;
-using webrtc::Config;
-using webrtc::DelayCorrection;
-using webrtc::EchoCancellation;
-using webrtc::GainControl;
-using webrtc::NoiseSuppression;
-using webrtc::scoped_array;
-using webrtc::scoped_ptr;
-using webrtc::TickInterval;
-using webrtc::TickTime;
-using webrtc::VoiceDetection;
+namespace webrtc {
 
 using webrtc::audioproc::Event;
 using webrtc::audioproc::Init;
@@ -52,28 +42,6 @@ using webrtc::audioproc::ReverseStream;
 using webrtc::audioproc::Stream;
 
 namespace {
-// Returns true on success, false on error or end-of-file.
-bool ReadMessageFromFile(FILE* file,
-                        ::google::protobuf::MessageLite* msg) {
-  // The "wire format" for the size is little-endian.
-  // Assume process_test is running on a little-endian machine.
-  int32_t size = 0;
-  if (fread(&size, sizeof(int32_t), 1, file) != 1) {
-    return false;
-  }
-  if (size <= 0) {
-    return false;
-  }
-  const size_t usize = static_cast<size_t>(size);
-
-  scoped_array<char> array(new char[usize]);
-  if (fread(array.get(), sizeof(char), usize, file) != usize) {
-    return false;
-  }
-
-  msg->Clear();
-  return msg->ParseFromArray(array.get(), usize);
-}
 
 void PrintStat(const AudioProcessing::Statistic& stat) {
   printf("%d, %d, %d\n", stat.average,
@@ -87,11 +55,11 @@ void usage() {
   "  [-ir REVERSE_FILE] [-i PRIMARY_FILE] [-o OUT_FILE]\n");
   printf(
   "process_test is a test application for AudioProcessing.\n\n"
-  "When a protobuf debug file is available, specify it with -pb.\n"
-  "Alternately, when -ir or -i is used, the specified files will be\n"
-  "processed directly in a simulation mode. Otherwise the full set of\n"
-  "legacy test files is expected to be present in the working directory.\n");
-  printf("\n");
+  "When a protobuf debug file is available, specify it with -pb. Alternately,\n"
+  "when -ir or -i is used, the specified files will be processed directly in\n"
+  "a simulation mode. Otherwise the full set of legacy test files is expected\n"
+  "to be present in the working directory. OUT_FILE should be specified\n"
+  "without extension to support both int and float output.\n\n");
   printf("Options\n");
   printf("General configuration (only used for the simulation mode):\n");
   printf("  -fs SAMPLE_RATE_HZ\n");
@@ -110,6 +78,7 @@ void usage() {
   printf("  --no_delay_logging\n");
   printf("  --aec_suppression_level LEVEL  [0 - 2]\n");
   printf("  --extended_filter\n");
+  printf("  --no_reported_delay\n");
   printf("\n  -aecm    Echo control mobile\n");
   printf("  --aecm_echo_path_in_file FILE\n");
   printf("  --aecm_echo_path_out_file FILE\n");
@@ -132,6 +101,7 @@ void usage() {
   printf("  --ns_prob_file FILE\n");
   printf("\n  -vad     Voice activity detection\n");
   printf("  --vad_out_file FILE\n");
+  printf("\n  -expns   Experimental noise suppression\n");
   printf("\n Level metrics (enabled by default)\n");
   printf("  --no_level_metrics\n");
   printf("\n");
@@ -173,20 +143,19 @@ void void_main(int argc, char* argv[]) {
     printf("Try `process_test --help' for more information.\n\n");
   }
 
-  scoped_ptr<AudioProcessing> apm(AudioProcessing::Create(0));
+  scoped_ptr<AudioProcessing> apm(AudioProcessing::Create());
   ASSERT_TRUE(apm.get() != NULL);
 
   const char* pb_filename = NULL;
   const char* far_filename = NULL;
   const char* near_filename = NULL;
-  const char* out_filename = NULL;
+  std::string out_filename;
   const char* vad_out_filename = NULL;
   const char* ns_prob_filename = NULL;
   const char* aecm_echo_path_in_filename = NULL;
   const char* aecm_echo_path_out_filename = NULL;
 
   int32_t sample_rate_hz = 16000;
-  int32_t device_sample_rate_hz = 16000;
 
   int num_capture_input_channels = 1;
   int num_capture_output_channels = 1;
@@ -200,7 +169,6 @@ void void_main(int argc, char* argv[]) {
   bool progress = true;
   int extra_delay_ms = 0;
   int override_delay_ms = 0;
-  //bool interleaved = true;
 
   ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(true));
   for (int i = 1; i < argc; i++) {
@@ -223,7 +191,7 @@ void void_main(int argc, char* argv[]) {
 
     } else if (strcmp(argv[i], "-o") == 0) {
       i++;
-      ASSERT_LT(i, argc) << "Specify filename after -o";
+      ASSERT_LT(i, argc) << "Specify filename without extension after -o";
       out_filename = argv[i];
 
     } else if (strcmp(argv[i], "-fs") == 0) {
@@ -290,6 +258,11 @@ void void_main(int argc, char* argv[]) {
       config.Set<DelayCorrection>(new DelayCorrection(true));
       apm->SetExtraOptions(config);
 
+    } else if (strcmp(argv[i], "--no_reported_delay") == 0) {
+      Config config;
+      config.Set<ReportedDelay>(new ReportedDelay(false));
+      apm->SetExtraOptions(config);
+
     } else if (strcmp(argv[i], "-aecm") == 0) {
       ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(true));
 
@@ -426,6 +399,11 @@ void void_main(int argc, char* argv[]) {
       ASSERT_LT(i, argc) << "Specify filename after --vad_out_file";
       vad_out_filename = argv[i];
 
+    } else if (strcmp(argv[i], "-expns") == 0) {
+      Config config;
+      config.Set<ExperimentalNs>(new ExperimentalNs(true));
+      apm->SetExtraOptions(config);
+
     } else if (strcmp(argv[i], "--noasm") == 0) {
       WebRtc_GetCPUInfo = WebRtc_GetCPUInfoNoASM;
       // We need to reinitialize here if components have already been enabled.
@@ -472,7 +450,6 @@ void void_main(int argc, char* argv[]) {
   const std::string out_path = webrtc::test::OutputPath();
   const char far_file_default[] = "apm_far.pcm";
   const char near_file_default[] = "apm_near.pcm";
-  const std::string out_file_default = out_path + "out.pcm";
   const char event_filename[] = "apm_event.dat";
   const char delay_filename[] = "apm_delay.dat";
   const char drift_filename[] = "apm_drift.dat";
@@ -484,9 +461,11 @@ void void_main(int argc, char* argv[]) {
     near_filename = near_file_default;
   }
 
-  if (!out_filename) {
-    out_filename = out_file_default.c_str();
+  if (out_filename.size() == 0) {
+    out_filename = out_path + "out";
   }
+  std::string out_float_filename = out_filename + ".float";
+  out_filename += ".pcm";
 
   if (!vad_out_filename) {
     vad_out_filename = vad_file_default.c_str();
@@ -499,7 +478,6 @@ void void_main(int argc, char* argv[]) {
   FILE* pb_file = NULL;
   FILE* far_file = NULL;
   FILE* near_file = NULL;
-  FILE* out_file = NULL;
   FILE* event_file = NULL;
   FILE* delay_file = NULL;
   FILE* drift_file = NULL;
@@ -509,38 +487,20 @@ void void_main(int argc, char* argv[]) {
   FILE* aecm_echo_path_out_file = NULL;
 
   if (pb_filename) {
-    pb_file = fopen(pb_filename, "rb");
-    ASSERT_TRUE(NULL != pb_file) << "Unable to open protobuf file "
-                                 << pb_filename;
+    pb_file = OpenFile(pb_filename, "rb");
   } else {
     if (far_filename) {
-      far_file = fopen(far_filename, "rb");
-      ASSERT_TRUE(NULL != far_file) << "Unable to open far-end audio file "
-                                    << far_filename;
+      far_file = OpenFile(far_filename, "rb");
     }
 
-    near_file = fopen(near_filename, "rb");
-    ASSERT_TRUE(NULL != near_file) << "Unable to open near-end audio file "
-                                   << near_filename;
+    near_file = OpenFile(near_filename, "rb");
     if (!simulating) {
-      event_file = fopen(event_filename, "rb");
-      ASSERT_TRUE(NULL != event_file) << "Unable to open event file "
-                                      << event_filename;
-
-      delay_file = fopen(delay_filename, "rb");
-      ASSERT_TRUE(NULL != delay_file) << "Unable to open buffer file "
-                                      << delay_filename;
-
-      drift_file = fopen(drift_filename, "rb");
-      ASSERT_TRUE(NULL != drift_file) << "Unable to open drift file "
-                                      << drift_filename;
+      event_file = OpenFile(event_filename, "rb");
+      delay_file = OpenFile(delay_filename, "rb");
+      drift_file = OpenFile(drift_filename, "rb");
     }
   }
 
-  out_file = fopen(out_filename, "wb");
-  ASSERT_TRUE(NULL != out_file) << "Unable to open output audio file "
-                                << out_filename;
-
   int near_size_bytes = 0;
   if (pb_file) {
     struct stat st;
@@ -554,25 +514,19 @@ void void_main(int argc, char* argv[]) {
   }
 
   if (apm->voice_detection()->is_enabled()) {
-    vad_out_file = fopen(vad_out_filename, "wb");
-    ASSERT_TRUE(NULL != vad_out_file) << "Unable to open VAD output file "
-                                      << vad_out_file;
+    vad_out_file = OpenFile(vad_out_filename, "wb");
   }
 
   if (apm->noise_suppression()->is_enabled()) {
-    ns_prob_file = fopen(ns_prob_filename, "wb");
-    ASSERT_TRUE(NULL != ns_prob_file) << "Unable to open NS output file "
-                                      << ns_prob_file;
+    ns_prob_file = OpenFile(ns_prob_filename, "wb");
   }
 
   if (aecm_echo_path_in_filename != NULL) {
-    aecm_echo_path_in_file = fopen(aecm_echo_path_in_filename, "rb");
-    ASSERT_TRUE(NULL != aecm_echo_path_in_file) << "Unable to open file "
-                                                << aecm_echo_path_in_filename;
+    aecm_echo_path_in_file = OpenFile(aecm_echo_path_in_filename, "rb");
 
     const size_t path_size =
         apm->echo_control_mobile()->echo_path_size_bytes();
-    scoped_array<char> echo_path(new char[path_size]);
+    scoped_ptr<char[]> echo_path(new char[path_size]);
     ASSERT_EQ(path_size, fread(echo_path.get(),
                                sizeof(char),
                                path_size,
@@ -585,9 +539,7 @@ void void_main(int argc, char* argv[]) {
   }
 
   if (aecm_echo_path_out_filename != NULL) {
-    aecm_echo_path_out_file = fopen(aecm_echo_path_out_filename, "wb");
-    ASSERT_TRUE(NULL != aecm_echo_path_out_file) << "Unable to open file "
-                                                 << aecm_echo_path_out_filename;
+    aecm_echo_path_out_file = OpenFile(aecm_echo_path_out_filename, "wb");
   }
 
   size_t read_count = 0;
@@ -616,6 +568,10 @@ void void_main(int argc, char* argv[]) {
   //            but for now we want to share the variables.
   if (pb_file) {
     Event event_msg;
+    scoped_ptr<ChannelBuffer<float> > reverse_cb;
+    scoped_ptr<ChannelBuffer<float> > primary_cb;
+    int output_sample_rate = 32000;
+    AudioProcessing::ChannelLayout output_layout = AudioProcessing::kMono;
     while (ReadMessageFromFile(pb_file, &event_msg)) {
       std::ostringstream trace_stream;
       trace_stream << "Processed frames: " << reverse_count << " (reverse), "
@@ -627,17 +583,25 @@ void void_main(int argc, char* argv[]) {
         const Init msg = event_msg.init();
 
         ASSERT_TRUE(msg.has_sample_rate());
-        // TODO(bjornv): Replace set_sample_rate_hz() when we have a smarter
-        // AnalyzeReverseStream().
-        ASSERT_EQ(apm->kNoError, apm->set_sample_rate_hz(msg.sample_rate()));
-        ASSERT_TRUE(msg.has_device_sample_rate());
-        ASSERT_EQ(apm->kNoError,
-                  apm->echo_cancellation()->set_device_sample_rate_hz(
-                      msg.device_sample_rate()));
-
         ASSERT_TRUE(msg.has_num_input_channels());
         ASSERT_TRUE(msg.has_num_output_channels());
         ASSERT_TRUE(msg.has_num_reverse_channels());
+        int reverse_sample_rate = msg.sample_rate();
+        if (msg.has_reverse_sample_rate()) {
+          reverse_sample_rate = msg.reverse_sample_rate();
+        }
+        output_sample_rate = msg.sample_rate();
+        if (msg.has_output_sample_rate()) {
+          output_sample_rate = msg.output_sample_rate();
+        }
+        output_layout = LayoutFromChannels(msg.num_output_channels());
+        ASSERT_EQ(kNoErr, apm->Initialize(
+                              msg.sample_rate(),
+                              output_sample_rate,
+                              reverse_sample_rate,
+                              LayoutFromChannels(msg.num_input_channels()),
+                              output_layout,
+                              LayoutFromChannels(msg.num_reverse_channels())));
 
         samples_per_channel = msg.sample_rate() / 100;
         far_frame.sample_rate_hz_ = msg.sample_rate();
@@ -646,33 +610,54 @@ void void_main(int argc, char* argv[]) {
         near_frame.sample_rate_hz_ = msg.sample_rate();
         near_frame.samples_per_channel_ = samples_per_channel;
         near_frame.num_channels_ = msg.num_input_channels();
+        reverse_cb.reset(new ChannelBuffer<float>(samples_per_channel,
+                                                  msg.num_reverse_channels()));
+        primary_cb.reset(new ChannelBuffer<float>(samples_per_channel,
+                                                  msg.num_input_channels()));
 
         if (verbose) {
           printf("Init at frame: %d (primary), %d (reverse)\n",
               primary_count, reverse_count);
-          printf("  Sample rate: %d Hz\n", msg.sample_rate());
+          printf("  Primary rates: %d Hz (in), %d Hz (out)\n",
+                 msg.sample_rate(), output_sample_rate);
           printf("  Primary channels: %d (in), %d (out)\n",
                  msg.num_input_channels(),
                  msg.num_output_channels());
-          printf("  Reverse channels: %d \n", msg.num_reverse_channels());
+          printf("  Reverse rate: %d\n", reverse_sample_rate);
+          printf("  Reverse channels: %d\n", msg.num_reverse_channels());
         }
 
       } else if (event_msg.type() == Event::REVERSE_STREAM) {
         ASSERT_TRUE(event_msg.has_reverse_stream());
-        const ReverseStream msg = event_msg.reverse_stream();
+        ReverseStream msg = event_msg.reverse_stream();
         reverse_count++;
 
-        ASSERT_TRUE(msg.has_data());
-        ASSERT_EQ(sizeof(int16_t) * samples_per_channel *
-            far_frame.num_channels_, msg.data().size());
-        memcpy(far_frame.data_, msg.data().data(), msg.data().size());
+        ASSERT_TRUE(msg.has_data() ^ (msg.channel_size() > 0));
+        if (msg.has_data()) {
+          ASSERT_EQ(sizeof(int16_t) * samples_per_channel *
+              far_frame.num_channels_, msg.data().size());
+          memcpy(far_frame.data_, msg.data().data(), msg.data().size());
+        } else {
+          for (int i = 0; i < msg.channel_size(); ++i) {
+            reverse_cb->CopyFrom(msg.channel(i).data(), i);
+          }
+        }
 
         if (perf_testing) {
           t0 = TickTime::Now();
         }
 
-        ASSERT_EQ(apm->kNoError,
-                  apm->AnalyzeReverseStream(&far_frame));
+        if (msg.has_data()) {
+          ASSERT_EQ(apm->kNoError,
+                    apm->AnalyzeReverseStream(&far_frame));
+        } else {
+          ASSERT_EQ(apm->kNoError,
+                    apm->AnalyzeReverseStream(
+                        reverse_cb->channels(),
+                        far_frame.samples_per_channel_,
+                        far_frame.sample_rate_hz_,
+                        LayoutFromChannels(far_frame.num_channels_)));
+        }
 
         if (perf_testing) {
           t1 = TickTime::Now();
@@ -694,12 +679,18 @@ void void_main(int argc, char* argv[]) {
         // ProcessStream could have changed this for the output frame.
         near_frame.num_channels_ = apm->num_input_channels();
 
-        ASSERT_TRUE(msg.has_input_data());
-        ASSERT_EQ(sizeof(int16_t) * samples_per_channel *
-            near_frame.num_channels_, msg.input_data().size());
-        memcpy(near_frame.data_,
-               msg.input_data().data(),
-               msg.input_data().size());
+        ASSERT_TRUE(msg.has_input_data() ^ (msg.input_channel_size() > 0));
+        if (msg.has_input_data()) {
+          ASSERT_EQ(sizeof(int16_t) * samples_per_channel *
+              near_frame.num_channels_, msg.input_data().size());
+          memcpy(near_frame.data_,
+                 msg.input_data().data(),
+                 msg.input_data().size());
+        } else {
+          for (int i = 0; i < msg.input_channel_size(); ++i) {
+            primary_cb->CopyFrom(msg.input_channel(i).data(), i);
+          }
+        }
 
         near_read_bytes += msg.input_data().size();
         if (progress && primary_count % 100 == 0) {
@@ -722,13 +713,32 @@ void void_main(int argc, char* argv[]) {
                   apm->set_stream_delay_ms(delay_ms));
         apm->echo_cancellation()->set_stream_drift_samples(msg.drift());
 
-        int err = apm->ProcessStream(&near_frame);
+        if (msg.has_keypress()) {
+          apm->set_stream_key_pressed(msg.keypress());
+        } else {
+          apm->set_stream_key_pressed(true);
+        }
+
+        int err = apm->kNoError;
+        if (msg.has_input_data()) {
+          err = apm->ProcessStream(&near_frame);
+          ASSERT_TRUE(near_frame.num_channels_ == apm->num_output_channels());
+        } else {
+          err = apm->ProcessStream(
+              primary_cb->channels(),
+              near_frame.samples_per_channel_,
+              near_frame.sample_rate_hz_,
+              LayoutFromChannels(near_frame.num_channels_),
+              output_sample_rate,
+              output_layout,
+              primary_cb->channels());
+        }
+
         if (err == apm->kBadStreamParameterWarning) {
           printf("Bad parameter warning. %s\n", trace_stream.str().c_str());
         }
         ASSERT_TRUE(err == apm->kNoError ||
                     err == apm->kBadStreamParameterWarning);
-        ASSERT_TRUE(near_frame.num_channels_ == apm->num_output_channels());
 
         stream_has_voice =
             static_cast<int8_t>(apm->voice_detection()->stream_has_voice());
@@ -759,11 +769,20 @@ void void_main(int argc, char* argv[]) {
           }
         }
 
-        size_t size = samples_per_channel * near_frame.num_channels_;
-        ASSERT_EQ(size, fwrite(near_frame.data_,
-                               sizeof(int16_t),
-                               size,
-                               out_file));
+        size_t num_samples = samples_per_channel * apm->num_output_channels();
+        if (msg.has_input_data()) {
+          static FILE* out_file = OpenFile(out_filename, "wb");
+          ASSERT_EQ(num_samples, fwrite(near_frame.data_,
+                                        sizeof(*near_frame.data_),
+                                        num_samples,
+                                        out_file));
+        } else {
+          static FILE* out_float_file = OpenFile(out_float_filename, "wb");
+          ASSERT_EQ(num_samples, fwrite(primary_cb->data(),
+                                        sizeof(*primary_cb->data()),
+                                        num_samples,
+                                        out_float_file));
+        }
       }
     }
 
@@ -811,19 +830,20 @@ void void_main(int argc, char* argv[]) {
             fread(&sample_rate_hz, sizeof(sample_rate_hz), 1, event_file));
         samples_per_channel = sample_rate_hz / 100;
 
+        int32_t unused_device_sample_rate_hz;
         ASSERT_EQ(1u,
-            fread(&device_sample_rate_hz,
-                  sizeof(device_sample_rate_hz),
+            fread(&unused_device_sample_rate_hz,
+                  sizeof(unused_device_sample_rate_hz),
                   1,
                   event_file));
 
-        // TODO(bjornv): Replace set_sample_rate_hz() when we have a smarter
-        // AnalyzeReverseStream().
-        ASSERT_EQ(apm->kNoError, apm->set_sample_rate_hz(sample_rate_hz));
-
-        ASSERT_EQ(apm->kNoError,
-                  apm->echo_cancellation()->set_device_sample_rate_hz(
-                      device_sample_rate_hz));
+        ASSERT_EQ(kNoErr, apm->Initialize(
+                              sample_rate_hz,
+                              sample_rate_hz,
+                              sample_rate_hz,
+                              LayoutFromChannels(num_capture_input_channels),
+                              LayoutFromChannels(num_capture_output_channels),
+                              LayoutFromChannels(num_render_channels)));
 
         far_frame.sample_rate_hz_ = sample_rate_hz;
         far_frame.samples_per_channel_ = samples_per_channel;
@@ -930,6 +950,8 @@ void void_main(int argc, char* argv[]) {
                   apm->set_stream_delay_ms(delay_ms));
         apm->echo_cancellation()->set_stream_drift_samples(drift_samples);
 
+        apm->set_stream_key_pressed(true);
+
         int err = apm->ProcessStream(&near_frame);
         if (err == apm->kBadStreamParameterWarning) {
           printf("Bad parameter warning. %s\n", trace_stream.str().c_str());
@@ -974,6 +996,7 @@ void void_main(int argc, char* argv[]) {
         }
 
         size = samples_per_channel * near_frame.num_channels_;
+        static FILE* out_file = OpenFile(out_filename, "wb");
         ASSERT_EQ(size, fwrite(near_frame.data_,
                                sizeof(int16_t),
                                size,
@@ -989,7 +1012,7 @@ void void_main(int argc, char* argv[]) {
   if (aecm_echo_path_out_file != NULL) {
     const size_t path_size =
         apm->echo_control_mobile()->echo_path_size_bytes();
-    scoped_array<char> echo_path(new char[path_size]);
+    scoped_ptr<char[]> echo_path(new char[path_size]);
     apm->echo_control_mobile()->GetEchoPath(echo_path.get(), path_size);
     ASSERT_EQ(path_size, fwrite(echo_path.get(),
                                 sizeof(char),
@@ -1067,11 +1090,13 @@ void void_main(int argc, char* argv[]) {
     }
   }
 }
+
 }  // namespace
+}  // namespace webrtc
 
 int main(int argc, char* argv[])
 {
-  void_main(argc, argv);
+  webrtc::void_main(argc, argv);
 
   // Optional, but removes memory leak noise from Valgrind.
   google::protobuf::ShutdownProtobufLibrary();