1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 package org.chromium.content.browser;
7 import android.content.ComponentName;
8 import android.content.Context;
9 import android.content.Intent;
10 import android.content.pm.PackageManager;
11 import android.content.pm.PackageManager.NameNotFoundException;
12 import android.content.pm.ResolveInfo;
13 import android.content.pm.ServiceInfo;
14 import android.os.Bundle;
15 import android.speech.RecognitionListener;
16 import android.speech.RecognitionService;
17 import android.speech.RecognizerIntent;
18 import android.speech.SpeechRecognizer;
20 import org.chromium.base.CalledByNative;
21 import org.chromium.base.JNINamespace;
22 import org.chromium.content_public.common.SpeechRecognitionErrorCode;
24 import java.util.ArrayList;
25 import java.util.List;
28 * This class uses Android's SpeechRecognizer to perform speech recognition for the Web Speech API
29 * on Android. Using Android's platform recognizer offers several benefits, like good quality and
30 * good local fallback when no data connection is available.
32 @JNINamespace("content")
33 public class SpeechRecognition {
35 // Constants describing the speech recognition provider we depend on.
36 private static final String PROVIDER_PACKAGE_NAME = "com.google.android.googlequicksearchbox";
37 private static final int PROVIDER_MIN_VERSION = 300207030;
39 // We track the recognition state to remember what events we need to send when recognition is
40 // being aborted. Once Android's recognizer is cancelled, its listener won't yield any more
41 // events, but we still need to call OnSoundEnd and OnAudioEnd if corresponding On*Start were
43 private static final int STATE_IDLE = 0;
44 private static final int STATE_AWAITING_SPEECH = 1;
45 private static final int STATE_CAPTURING_SPEECH = 2;
48 // The speech recognition provider (if any) matching PROVIDER_PACKAGE_NAME and
49 // PROVIDER_MIN_VERSION as selected by initialize().
50 private static ComponentName sRecognitionProvider;
52 private final Context mContext;
53 private final Intent mIntent;
54 private final RecognitionListener mListener;
55 private SpeechRecognizer mRecognizer;
57 // Native pointer to C++ SpeechRecognizerImplAndroid.
58 private long mNativeSpeechRecognizerImplAndroid;
60 // Remember if we are using continuous recognition.
61 private boolean mContinuous;
63 // Internal class to handle events from Android's SpeechRecognizer and route them to native.
64 class Listener implements RecognitionListener {
67 public void onBeginningOfSpeech() {
68 mState = STATE_CAPTURING_SPEECH;
69 nativeOnSoundStart(mNativeSpeechRecognizerImplAndroid);
73 public void onBufferReceived(byte[] buffer) { }
76 public void onEndOfSpeech() {
77 // Ignore onEndOfSpeech in continuous mode to let terminate() take care of ending
78 // events. The Android API documentation is vague as to when onEndOfSpeech is called in
79 // continuous mode, whereas the Web Speech API defines a stronger semantic on the
80 // equivalent (onsoundend) event. Thus, the only way to provide a valid onsoundend
81 // event is to trigger it when the last result is received or the session is aborted.
83 nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid);
84 // Since Android doesn't have a dedicated event for when audio capture is finished,
85 // we fire it after speech has ended.
86 nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid);
92 public void onError(int error) {
93 int code = SpeechRecognitionErrorCode.NONE;
95 // Translate Android SpeechRecognizer errors to Web Speech API errors.
97 case SpeechRecognizer.ERROR_AUDIO:
98 code = SpeechRecognitionErrorCode.AUDIO;
100 case SpeechRecognizer.ERROR_CLIENT:
101 code = SpeechRecognitionErrorCode.ABORTED;
103 case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
104 case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
105 code = SpeechRecognitionErrorCode.NOT_ALLOWED;
107 case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
108 case SpeechRecognizer.ERROR_NETWORK:
109 case SpeechRecognizer.ERROR_SERVER:
110 code = SpeechRecognitionErrorCode.NETWORK;
112 case SpeechRecognizer.ERROR_NO_MATCH:
113 code = SpeechRecognitionErrorCode.NO_MATCH;
115 case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
116 code = SpeechRecognitionErrorCode.NO_SPEECH;
127 public void onEvent(int event, Bundle bundle) { }
130 public void onPartialResults(Bundle bundle) {
131 handleResults(bundle, true);
135 public void onReadyForSpeech(Bundle bundle) {
136 mState = STATE_AWAITING_SPEECH;
137 nativeOnAudioStart(mNativeSpeechRecognizerImplAndroid);
141 public void onResults(Bundle bundle) {
142 handleResults(bundle, false);
143 // We assume that onResults is called only once, at the end of a session, thus we
144 // terminate. If one day the recognition provider changes dictation mode behavior to
145 // call onResults several times, we should terminate only if (!mContinuous).
146 terminate(SpeechRecognitionErrorCode.NONE);
150 public void onRmsChanged(float rms) { }
152 private void handleResults(Bundle bundle, boolean provisional) {
153 if (mContinuous && provisional) {
154 // In continuous mode, Android's recognizer sends final results as provisional.
158 ArrayList<String> list = bundle.getStringArrayList(
159 SpeechRecognizer.RESULTS_RECOGNITION);
160 String[] results = list.toArray(new String[list.size()]);
162 float[] scores = bundle.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES);
164 nativeOnRecognitionResults(mNativeSpeechRecognizerImplAndroid,
172 * This method must be called before any instance of SpeechRecognition can be created. It will
173 * query Android's package manager to find a suitable speech recognition provider that supports
174 * continuous recognition.
176 public static boolean initialize(Context context) {
177 if (!SpeechRecognizer.isRecognitionAvailable(context))
180 PackageManager pm = context.getPackageManager();
181 Intent intent = new Intent(RecognitionService.SERVICE_INTERFACE);
182 final List<ResolveInfo> list = pm.queryIntentServices(intent, PackageManager.GET_SERVICES);
184 for (ResolveInfo resolve : list) {
185 ServiceInfo service = resolve.serviceInfo;
187 if (!service.packageName.equals(PROVIDER_PACKAGE_NAME))
192 versionCode = pm.getPackageInfo(service.packageName, 0).versionCode;
193 } catch (NameNotFoundException e) {
197 if (versionCode < PROVIDER_MIN_VERSION)
200 sRecognitionProvider = new ComponentName(service.packageName, service.name);
205 // If we reach this point, we failed to find a suitable recognition provider.
209 private SpeechRecognition(final Context context, long nativeSpeechRecognizerImplAndroid) {
212 mNativeSpeechRecognizerImplAndroid = nativeSpeechRecognizerImplAndroid;
213 mListener = new Listener();
214 mIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
216 if (sRecognitionProvider != null) {
217 mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext, sRecognitionProvider);
219 // It is possible to force-enable the speech recognition web platform feature (using a
220 // command-line flag) even if initialize() failed to find the PROVIDER_PACKAGE_NAME
221 // provider, in which case the first available speech recognition provider is used.
222 // Caveat: Continuous mode may not work as expected with a different provider.
223 mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext);
226 mRecognizer.setRecognitionListener(mListener);
229 // This function destroys everything when recognition is done, taking care to properly tear
230 // down by calling On{Sound,Audio}End if corresponding On{Audio,Sound}Start were called.
231 private void terminate(int error) {
233 if (mState != STATE_IDLE) {
234 if (mState == STATE_CAPTURING_SPEECH) {
235 nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid);
237 nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid);
241 if (error != SpeechRecognitionErrorCode.NONE)
242 nativeOnRecognitionError(mNativeSpeechRecognizerImplAndroid, error);
244 mRecognizer.destroy();
246 nativeOnRecognitionEnd(mNativeSpeechRecognizerImplAndroid);
247 mNativeSpeechRecognizerImplAndroid = 0;
251 private static SpeechRecognition createSpeechRecognition(
252 Context context, long nativeSpeechRecognizerImplAndroid) {
253 return new SpeechRecognition(context, nativeSpeechRecognizerImplAndroid);
257 private void startRecognition(String language, boolean continuous, boolean interimResults) {
258 if (mRecognizer == null)
261 mContinuous = continuous;
262 mIntent.putExtra("android.speech.extra.DICTATION_MODE", continuous);
263 mIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, language);
264 mIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, interimResults);
265 mRecognizer.startListening(mIntent);
269 private void abortRecognition() {
270 if (mRecognizer == null)
273 mRecognizer.cancel();
274 terminate(SpeechRecognitionErrorCode.ABORTED);
278 private void stopRecognition() {
279 if (mRecognizer == null)
283 mRecognizer.stopListening();
286 // Native JNI calls to content/browser/speech/speech_recognizer_impl_android.cc
287 private native void nativeOnAudioStart(long nativeSpeechRecognizerImplAndroid);
288 private native void nativeOnSoundStart(long nativeSpeechRecognizerImplAndroid);
289 private native void nativeOnSoundEnd(long nativeSpeechRecognizerImplAndroid);
290 private native void nativeOnAudioEnd(long nativeSpeechRecognizerImplAndroid);
291 private native void nativeOnRecognitionResults(long nativeSpeechRecognizerImplAndroid,
294 boolean provisional);
295 private native void nativeOnRecognitionError(long nativeSpeechRecognizerImplAndroid, int error);
296 private native void nativeOnRecognitionEnd(long nativeSpeechRecognizerImplAndroid);