- add sources.
[platform/framework/web/crosswalk.git] / src / content / browser / speech / speech_recognizer_impl.h
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
7
8 #include "base/basictypes.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "content/browser/speech/endpointer/endpointer.h"
11 #include "content/browser/speech/speech_recognition_engine.h"
12 #include "content/browser/speech/speech_recognizer.h"
13 #include "content/public/common/speech_recognition_error.h"
14 #include "content/public/common/speech_recognition_result.h"
15 #include "media/audio/audio_input_controller.h"
16 #include "net/url_request/url_request_context_getter.h"
17
18 namespace media {
19 class AudioManager;
20 }
21
22 namespace content {
23
24 class SpeechRecognitionEventListener;
25
26 // Handles speech recognition for a session (identified by |session_id|), taking
27 // care of audio capture, silence detection/endpointer and interaction with the
28 // SpeechRecognitionEngine.
29 class CONTENT_EXPORT SpeechRecognizerImpl
30     : public SpeechRecognizer,
31       public media::AudioInputController::EventHandler,
32       public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {
33  public:
34   static const int kAudioSampleRate;
35   static const media::ChannelLayout kChannelLayout;
36   static const int kNumBitsPerAudioSample;
37   static const int kNoSpeechTimeoutMs;
38   static const int kEndpointerEstimationTimeMs;
39
40   static void SetAudioManagerForTesting(media::AudioManager* audio_manager);
41
42   SpeechRecognizerImpl(SpeechRecognitionEventListener* listener,
43                        int session_id,
44                        bool is_single_shot,
45                        SpeechRecognitionEngine* engine);
46
47   virtual void StartRecognition(const std::string& device_id) OVERRIDE;
48   virtual void AbortRecognition() OVERRIDE;
49   virtual void StopAudioCapture() OVERRIDE;
50   virtual bool IsActive() const OVERRIDE;
51   virtual bool IsCapturingAudio() const OVERRIDE;
52   const SpeechRecognitionEngine& recognition_engine() const;
53
54  private:
55   friend class SpeechRecognizerTest;
56
57   enum FSMState {
58     STATE_IDLE = 0,
59     STATE_STARTING,
60     STATE_ESTIMATING_ENVIRONMENT,
61     STATE_WAITING_FOR_SPEECH,
62     STATE_RECOGNIZING,
63     STATE_WAITING_FINAL_RESULT,
64     STATE_ENDED,
65     STATE_MAX_VALUE = STATE_ENDED
66   };
67
68   enum FSMEvent {
69     EVENT_ABORT = 0,
70     EVENT_START,
71     EVENT_STOP_CAPTURE,
72     EVENT_AUDIO_DATA,
73     EVENT_ENGINE_RESULT,
74     EVENT_ENGINE_ERROR,
75     EVENT_AUDIO_ERROR,
76     EVENT_MAX_VALUE = EVENT_AUDIO_ERROR
77   };
78
79   struct FSMEventArgs {
80     explicit FSMEventArgs(FSMEvent event_value);
81     ~FSMEventArgs();
82
83     FSMEvent event;
84     scoped_refptr<AudioChunk> audio_data;
85     SpeechRecognitionResults engine_results;
86     SpeechRecognitionError engine_error;
87   };
88
89   virtual ~SpeechRecognizerImpl();
90
91   // Entry point for pushing any new external event into the recognizer FSM.
92   void DispatchEvent(const FSMEventArgs& event_args);
93
94   // Defines the behavior of the recognizer FSM, selecting the appropriate
95   // transition according to the current state and event.
96   FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);
97
98   // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).
99   void ProcessAudioPipeline(const AudioChunk& raw_audio);
100
101   // The methods below handle transitions of the recognizer FSM.
102   FSMState StartRecording(const FSMEventArgs& event_args);
103   FSMState StartRecognitionEngine(const FSMEventArgs& event_args);
104   FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);
105   FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);
106   FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);
107   FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);
108   FSMState ProcessFinalResult(const FSMEventArgs& event_args);
109   FSMState AbortSilently(const FSMEventArgs& event_args);
110   FSMState AbortWithError(const FSMEventArgs& event_args);
111   FSMState Abort(const SpeechRecognitionError& error);
112   FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);
113   FSMState DoNothing(const FSMEventArgs& event_args) const;
114   FSMState NotFeasible(const FSMEventArgs& event_args);
115
116   // Returns the time span of captured audio samples since the start of capture.
117   int GetElapsedTimeMs() const;
118
119   // Calculates the input volume to be displayed in the UI, triggering the
120   // OnAudioLevelsChange event accordingly.
121   void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);
122
123   void CloseAudioControllerAsynchronously();
124
125   // Callback called on IO thread by audio_controller->Close().
126   void OnAudioClosed(media::AudioInputController*);
127
128   // AudioInputController::EventHandler methods.
129   virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
130   virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
131   virtual void OnError(media::AudioInputController* controller) OVERRIDE;
132   virtual void OnData(media::AudioInputController* controller,
133                       const uint8* data, uint32 size) OVERRIDE;
134
135   // SpeechRecognitionEngineDelegate methods.
136   virtual void OnSpeechRecognitionEngineResults(
137       const SpeechRecognitionResults& results) OVERRIDE;
138   virtual void OnSpeechRecognitionEngineError(
139       const SpeechRecognitionError& error) OVERRIDE;
140
141   static media::AudioManager* audio_manager_for_tests_;
142
143   scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
144   Endpointer endpointer_;
145   scoped_refptr<media::AudioInputController> audio_controller_;
146   int num_samples_recorded_;
147   float audio_level_;
148   bool is_dispatching_event_;
149   bool is_single_shot_;
150   FSMState state_;
151   std::string device_id_;
152
153   class OnDataConverter;
154
155   // Converts data between native input format and a WebSpeech specific
156   // output format.
157   scoped_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_;
158
159   DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
160 };
161
162 }  // namespace content
163
164 #endif  // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_