src/content/public/android/java/src/org/chromium/content/browser/SpeechRecognition.java

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 package org.chromium.content.browser;
   6
   7 import android.content.ComponentName;
   8 import android.content.Context;
   9 import android.content.Intent;
  10 import android.content.pm.PackageManager;
  11 import android.content.pm.PackageManager.NameNotFoundException;
  12 import android.content.pm.ResolveInfo;
  13 import android.content.pm.ServiceInfo;
  14 import android.os.Bundle;
  15 import android.speech.RecognitionListener;
  16 import android.speech.RecognitionService;
  17 import android.speech.RecognizerIntent;
  18 import android.speech.SpeechRecognizer;
  19
  20 import org.chromium.base.CalledByNative;
  21 import org.chromium.base.JNINamespace;
  22 import org.chromium.content_public.common.SpeechRecognitionErrorCode;
  23
  24 import java.util.ArrayList;
  25 import java.util.List;
  26
  27 /**
  28  * This class uses Android's SpeechRecognizer to perform speech recognition for the Web Speech API
  29  * on Android. Using Android's platform recognizer offers several benefits, like good quality and
  30  * good local fallback when no data connection is available.
  31  */
  32 @JNINamespace("content")
  33 public class SpeechRecognition {
  34
  35     // Constants describing the speech recognition provider we depend on.
  36     private static final String PROVIDER_PACKAGE_NAME = "com.google.android.googlequicksearchbox";
  37     private static final int PROVIDER_MIN_VERSION = 300207030;
  38
  39     // We track the recognition state to remember what events we need to send when recognition is
  40     // being aborted. Once Android's recognizer is cancelled, its listener won't yield any more
  41     // events, but we still need to call OnSoundEnd and OnAudioEnd if corresponding On*Start were
  42     // called before.
  43     private static final int STATE_IDLE = 0;
  44     private static final int STATE_AWAITING_SPEECH = 1;
  45     private static final int STATE_CAPTURING_SPEECH = 2;
  46     private int mState;
  47
  48     // The speech recognition provider (if any) matching PROVIDER_PACKAGE_NAME and
  49     // PROVIDER_MIN_VERSION as selected by initialize().
  50     private static ComponentName sRecognitionProvider;
  51
  52     private final Context mContext;
  53     private final Intent mIntent;
  54     private final RecognitionListener mListener;
  55     private SpeechRecognizer mRecognizer;
  56
  57     // Native pointer to C++ SpeechRecognizerImplAndroid.
  58     private long mNativeSpeechRecognizerImplAndroid;
  59
  60     // Remember if we are using continuous recognition.
  61     private boolean mContinuous;
  62
  63     // Internal class to handle events from Android's SpeechRecognizer and route them to native.
  64     class Listener implements RecognitionListener {
  65
  66         @Override
  67         public void onBeginningOfSpeech() {
  68             mState = STATE_CAPTURING_SPEECH;
  69             nativeOnSoundStart(mNativeSpeechRecognizerImplAndroid);
  70         }
  71
  72         @Override
  73         public void onBufferReceived(byte[] buffer) { }
  74
  75         @Override
  76         public void onEndOfSpeech() {
  77             // Ignore onEndOfSpeech in continuous mode to let terminate() take care of ending
  78             // events. The Android API documentation is vague as to when onEndOfSpeech is called in
  79             // continuous mode, whereas the Web Speech API defines a stronger semantic on the
  80             // equivalent (onsoundend) event. Thus, the only way to provide a valid onsoundend
  81             // event is to trigger it when the last result is received or the session is aborted.
  82             if (!mContinuous) {
  83                 nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid);
  84                 // Since Android doesn't have a dedicated event for when audio capture is finished,
  85                 // we fire it after speech has ended.
  86                 nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid);
  87                 mState = STATE_IDLE;
  88             }
  89         }
  90
  91         @Override
  92         public void onError(int error) {
  93             int code = SpeechRecognitionErrorCode.NONE;
  94
  95             // Translate Android SpeechRecognizer errors to Web Speech API errors.
  96             switch(error) {
  97                 case SpeechRecognizer.ERROR_AUDIO:
  98                     code = SpeechRecognitionErrorCode.AUDIO;
  99                     break;
 100                 case SpeechRecognizer.ERROR_CLIENT:
 101                     code = SpeechRecognitionErrorCode.ABORTED;
 102                     break;
 103                 case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
 104                 case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
 105                     code = SpeechRecognitionErrorCode.NOT_ALLOWED;
 106                     break;
 107                 case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
 108                 case SpeechRecognizer.ERROR_NETWORK:
 109                 case SpeechRecognizer.ERROR_SERVER:
 110                     code = SpeechRecognitionErrorCode.NETWORK;
 111                     break;
 112                 case SpeechRecognizer.ERROR_NO_MATCH:
 113                     code = SpeechRecognitionErrorCode.NO_MATCH;
 114                     break;
 115                 case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
 116                     code = SpeechRecognitionErrorCode.NO_SPEECH;
 117                     break;
 118                 default:
 119                     assert false;
 120                     return;
 121             }
 122
 123             terminate(code);
 124         }
 125
 126         @Override
 127         public void onEvent(int event, Bundle bundle) { }
 128
 129         @Override
 130         public void onPartialResults(Bundle bundle) {
 131             handleResults(bundle, true);
 132         }
 133
 134         @Override
 135         public void onReadyForSpeech(Bundle bundle) {
 136             mState = STATE_AWAITING_SPEECH;
 137             nativeOnAudioStart(mNativeSpeechRecognizerImplAndroid);
 138         }
 139
 140         @Override
 141         public void onResults(Bundle bundle) {
 142             handleResults(bundle, false);
 143             // We assume that onResults is called only once, at the end of a session, thus we
 144             // terminate. If one day the recognition provider changes dictation mode behavior to
 145             // call onResults several times, we should terminate only if (!mContinuous).
 146             terminate(SpeechRecognitionErrorCode.NONE);
 147         }
 148
 149         @Override
 150         public void onRmsChanged(float rms) { }
 151
 152         private void handleResults(Bundle bundle, boolean provisional) {
 153             if (mContinuous && provisional) {
 154                 // In continuous mode, Android's recognizer sends final results as provisional.
 155                 provisional = false;
 156             }
 157
 158             ArrayList<String> list = bundle.getStringArrayList(
 159                     SpeechRecognizer.RESULTS_RECOGNITION);
 160             String[] results = list.toArray(new String[list.size()]);
 161
 162             float[] scores = bundle.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES);
 163
 164             nativeOnRecognitionResults(mNativeSpeechRecognizerImplAndroid,
 165                                        results,
 166                                        scores,
 167                                        provisional);
 168         }
 169     }
 170
 171     /**
 172      * This method must be called before any instance of SpeechRecognition can be created. It will
 173      * query Android's package manager to find a suitable speech recognition provider that supports
 174      * continuous recognition.
 175      */
 176     public static boolean initialize(Context context) {
 177         if (!SpeechRecognizer.isRecognitionAvailable(context))
 178             return false;
 179
 180         PackageManager pm = context.getPackageManager();
 181         Intent intent = new Intent(RecognitionService.SERVICE_INTERFACE);
 182         final List<ResolveInfo> list = pm.queryIntentServices(intent, PackageManager.GET_SERVICES);
 183
 184         for (ResolveInfo resolve : list) {
 185             ServiceInfo service = resolve.serviceInfo;
 186
 187             if (!service.packageName.equals(PROVIDER_PACKAGE_NAME))
 188                 continue;
 189
 190             int versionCode;
 191             try {
 192                 versionCode = pm.getPackageInfo(service.packageName, 0).versionCode;
 193             } catch (NameNotFoundException e) {
 194                 continue;
 195             }
 196
 197             if (versionCode < PROVIDER_MIN_VERSION)
 198                 continue;
 199
 200             sRecognitionProvider = new ComponentName(service.packageName, service.name);
 201
 202             return true;
 203         }
 204
 205         // If we reach this point, we failed to find a suitable recognition provider.
 206         return false;
 207     }
 208
 209     private SpeechRecognition(final Context context, long nativeSpeechRecognizerImplAndroid) {
 210         mContext = context;
 211         mContinuous = false;
 212         mNativeSpeechRecognizerImplAndroid = nativeSpeechRecognizerImplAndroid;
 213         mListener = new Listener();
 214         mIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
 215
 216         if (sRecognitionProvider != null) {
 217             mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext, sRecognitionProvider);
 218         } else {
 219             // It is possible to force-enable the speech recognition web platform feature (using a
 220             // command-line flag) even if initialize() failed to find the PROVIDER_PACKAGE_NAME
 221             // provider, in which case the first available speech recognition provider is used.
 222             // Caveat: Continuous mode may not work as expected with a different provider.
 223             mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext);
 224         }
 225
 226         mRecognizer.setRecognitionListener(mListener);
 227     }
 228
 229     // This function destroys everything when recognition is done, taking care to properly tear
 230     // down by calling On{Sound,Audio}End if corresponding On{Audio,Sound}Start were called.
 231     private void terminate(int error) {
 232
 233         if (mState != STATE_IDLE) {
 234             if (mState == STATE_CAPTURING_SPEECH) {
 235                 nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid);
 236             }
 237             nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid);
 238             mState = STATE_IDLE;
 239         }
 240
 241         if (error != SpeechRecognitionErrorCode.NONE)
 242             nativeOnRecognitionError(mNativeSpeechRecognizerImplAndroid, error);
 243
 244         mRecognizer.destroy();
 245         mRecognizer = null;
 246         nativeOnRecognitionEnd(mNativeSpeechRecognizerImplAndroid);
 247         mNativeSpeechRecognizerImplAndroid = 0;
 248     }
 249
 250     @CalledByNative
 251     private static SpeechRecognition createSpeechRecognition(
 252             Context context, long nativeSpeechRecognizerImplAndroid) {
 253         return new SpeechRecognition(context, nativeSpeechRecognizerImplAndroid);
 254     }
 255
 256     @CalledByNative
 257     private void startRecognition(String language, boolean continuous, boolean interimResults) {
 258         if (mRecognizer == null)
 259             return;
 260
 261         mContinuous = continuous;
 262         mIntent.putExtra("android.speech.extra.DICTATION_MODE", continuous);
 263         mIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, language);
 264         mIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, interimResults);
 265         mRecognizer.startListening(mIntent);
 266     }
 267
 268     @CalledByNative
 269     private void abortRecognition() {
 270         if (mRecognizer == null)
 271             return;
 272
 273         mRecognizer.cancel();
 274         terminate(SpeechRecognitionErrorCode.ABORTED);
 275     }
 276
 277     @CalledByNative
 278     private void stopRecognition() {
 279         if (mRecognizer == null)
 280             return;
 281
 282         mContinuous = false;
 283         mRecognizer.stopListening();
 284     }
 285
 286     // Native JNI calls to content/browser/speech/speech_recognizer_impl_android.cc
 287     private native void nativeOnAudioStart(long nativeSpeechRecognizerImplAndroid);
 288     private native void nativeOnSoundStart(long nativeSpeechRecognizerImplAndroid);
 289     private native void nativeOnSoundEnd(long nativeSpeechRecognizerImplAndroid);
 290     private native void nativeOnAudioEnd(long nativeSpeechRecognizerImplAndroid);
 291     private native void nativeOnRecognitionResults(long nativeSpeechRecognizerImplAndroid,
 292                                                    String[] results,
 293                                                    float[] scores,
 294                                                    boolean provisional);
 295     private native void nativeOnRecognitionError(long nativeSpeechRecognizerImplAndroid, int error);
 296     private native void nativeOnRecognitionEnd(long nativeSpeechRecognizerImplAndroid);
 297 }