Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / content / browser / speech / speech_recognizer_impl.h
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
7
8 #include "base/basictypes.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "content/browser/speech/endpointer/endpointer.h"
11 #include "content/browser/speech/speech_recognition_engine.h"
12 #include "content/browser/speech/speech_recognizer.h"
13 #include "content/public/common/speech_recognition_error.h"
14 #include "content/public/common/speech_recognition_result.h"
15 #include "media/audio/audio_input_controller.h"
16 #include "media/audio/audio_logging.h"
17 #include "net/url_request/url_request_context_getter.h"
18
19 namespace media {
20 class AudioBus;
21 class AudioManager;
22 }
23
24 namespace content {
25
26 class SpeechRecognitionEventListener;
27
28 // Handles speech recognition for a session (identified by |session_id|), taking
29 // care of audio capture, silence detection/endpointer and interaction with the
30 // SpeechRecognitionEngine.
31 class CONTENT_EXPORT SpeechRecognizerImpl
32     : public SpeechRecognizer,
33       public media::AudioInputController::EventHandler,
34       public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {
35  public:
36   static const int kAudioSampleRate;
37   static const media::ChannelLayout kChannelLayout;
38   static const int kNumBitsPerAudioSample;
39   static const int kNoSpeechTimeoutMs;
40   static const int kEndpointerEstimationTimeMs;
41
42   static void SetAudioManagerForTesting(media::AudioManager* audio_manager);
43
44   SpeechRecognizerImpl(SpeechRecognitionEventListener* listener,
45                        int session_id,
46                        bool continuous,
47                        bool provisional_results,
48                        SpeechRecognitionEngine* engine);
49
50   virtual void StartRecognition(const std::string& device_id) OVERRIDE;
51   virtual void AbortRecognition() OVERRIDE;
52   virtual void StopAudioCapture() OVERRIDE;
53   virtual bool IsActive() const OVERRIDE;
54   virtual bool IsCapturingAudio() const OVERRIDE;
55   const SpeechRecognitionEngine& recognition_engine() const;
56
57  private:
58   friend class SpeechRecognizerTest;
59
60   enum FSMState {
61     STATE_IDLE = 0,
62     STATE_STARTING,
63     STATE_ESTIMATING_ENVIRONMENT,
64     STATE_WAITING_FOR_SPEECH,
65     STATE_RECOGNIZING,
66     STATE_WAITING_FINAL_RESULT,
67     STATE_ENDED,
68     STATE_MAX_VALUE = STATE_ENDED
69   };
70
71   enum FSMEvent {
72     EVENT_ABORT = 0,
73     EVENT_START,
74     EVENT_STOP_CAPTURE,
75     EVENT_AUDIO_DATA,
76     EVENT_ENGINE_RESULT,
77     EVENT_ENGINE_ERROR,
78     EVENT_AUDIO_ERROR,
79     EVENT_MAX_VALUE = EVENT_AUDIO_ERROR
80   };
81
82   struct FSMEventArgs {
83     explicit FSMEventArgs(FSMEvent event_value);
84     ~FSMEventArgs();
85
86     FSMEvent event;
87     scoped_refptr<AudioChunk> audio_data;
88     SpeechRecognitionResults engine_results;
89     SpeechRecognitionError engine_error;
90   };
91
92   virtual ~SpeechRecognizerImpl();
93
94   // Entry point for pushing any new external event into the recognizer FSM.
95   void DispatchEvent(const FSMEventArgs& event_args);
96
97   // Defines the behavior of the recognizer FSM, selecting the appropriate
98   // transition according to the current state and event.
99   FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);
100
101   // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).
102   void ProcessAudioPipeline(const AudioChunk& raw_audio);
103
104   // The methods below handle transitions of the recognizer FSM.
105   FSMState StartRecording(const FSMEventArgs& event_args);
106   FSMState StartRecognitionEngine(const FSMEventArgs& event_args);
107   FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);
108   FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);
109   FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);
110   FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);
111   FSMState ProcessFinalResult(const FSMEventArgs& event_args);
112   FSMState AbortSilently(const FSMEventArgs& event_args);
113   FSMState AbortWithError(const FSMEventArgs& event_args);
114   FSMState Abort(const SpeechRecognitionError& error);
115   FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);
116   FSMState DoNothing(const FSMEventArgs& event_args) const;
117   FSMState NotFeasible(const FSMEventArgs& event_args);
118
119   // Returns the time span of captured audio samples since the start of capture.
120   int GetElapsedTimeMs() const;
121
122   // Calculates the input volume to be displayed in the UI, triggering the
123   // OnAudioLevelsChange event accordingly.
124   void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);
125
126   void CloseAudioControllerAsynchronously();
127
128   // Callback called on IO thread by audio_controller->Close().
129   void OnAudioClosed(media::AudioInputController*);
130
131   // AudioInputController::EventHandler methods.
132   virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
133   virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
134   virtual void OnError(media::AudioInputController* controller,
135       media::AudioInputController::ErrorCode error_code) OVERRIDE;
136   virtual void OnData(media::AudioInputController* controller,
137                       const media::AudioBus* data) OVERRIDE;
138   virtual void OnLog(media::AudioInputController* controller,
139                      const std::string& message) OVERRIDE {}
140
141   // SpeechRecognitionEngineDelegate methods.
142   virtual void OnSpeechRecognitionEngineResults(
143       const SpeechRecognitionResults& results) OVERRIDE;
144   virtual void OnSpeechRecognitionEngineError(
145       const SpeechRecognitionError& error) OVERRIDE;
146
147   static media::AudioManager* audio_manager_for_tests_;
148
149   scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
150   Endpointer endpointer_;
151   scoped_refptr<media::AudioInputController> audio_controller_;
152   scoped_ptr<media::AudioLog> audio_log_;
153   int num_samples_recorded_;
154   float audio_level_;
155   bool is_dispatching_event_;
156   bool provisional_results_;
157   FSMState state_;
158   std::string device_id_;
159
160   class OnDataConverter;
161
162   // Converts data between native input format and a WebSpeech specific
163   // output format.
164   scoped_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_;
165
166   DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
167 };
168
169 }  // namespace content
170
171 #endif  // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_