/* * Copyright (C) 2009 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.android.inputmethod.deprecated.voice; import com.sugree.inputmethod.latin.R; import com.sugree.inputmethod.latin.EditingUtils; import com.sugree.inputmethod.latin.LatinImeLogger; import com.sugree.inputmethod.latin.StaticInnerHandlerWrapper; import android.content.ContentResolver; import android.content.Context; import android.content.Intent; import android.content.res.Configuration; import android.os.Build; import android.os.Bundle; import android.os.Message; import android.os.Parcelable; import android.speech.RecognitionListener; import android.speech.RecognizerIntent; import android.speech.SpeechRecognizer; import android.util.Log; import android.view.View; import android.view.View.OnClickListener; import android.view.inputmethod.InputConnection; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; /** * Speech recognition input, including both user interface and a background * process to stream audio to the network recognizer. This class supplies a * View (getView()), which it updates as recognition occurs. The user of this * class is responsible for making the view visible to the user, as well as * handling various events returned through UiListener. */ public class VoiceInput implements OnClickListener { private static final String TAG = "VoiceInput"; private static final String EXTRA_RECOGNITION_CONTEXT = "android.speech.extras.RECOGNITION_CONTEXT"; private static final String EXTRA_CALLING_PACKAGE = "calling_package"; private static final String EXTRA_ALTERNATES = "android.speech.extra.ALTERNATES"; private static final int MAX_ALT_LIST_LENGTH = 6; private static boolean DBG = LatinImeLogger.sDBG; private static final String DEFAULT_RECOMMENDED_PACKAGES = "com.android.mms " + "com.google.android.gm " + "com.google.android.talk " + "com.google.android.apps.googlevoice " + "com.android.email " + "com.android.browser "; // WARNING! Before enabling this, fix the problem with calling getExtractedText() in // landscape view. It causes Extracted text updates to be rejected due to a token mismatch public static boolean ENABLE_WORD_CORRECTIONS = true; // Dummy word suggestion which means "delete current word" public static final String DELETE_SYMBOL = " \u00D7 "; // times symbol private Whitelist mRecommendedList; private Whitelist mBlacklist; private VoiceInputLogger mLogger; // Names of a few extras defined in VoiceSearch's RecognitionController // Note, the version of voicesearch that shipped in Froyo returns the raw // RecognitionClientAlternates protocol buffer under the key "alternates", // so a VS market update must be installed on Froyo devices in order to see // alternatives. private static final String ALTERNATES_BUNDLE = "alternates_bundle"; // This is copied from the VoiceSearch app. @SuppressWarnings("unused") private static final class AlternatesBundleKeys { public static final String ALTERNATES = "alternates"; public static final String CONFIDENCE = "confidence"; public static final String LENGTH = "length"; public static final String MAX_SPAN_LENGTH = "max_span_length"; public static final String SPANS = "spans"; public static final String SPAN_KEY_DELIMITER = ":"; public static final String START = "start"; public static final String TEXT = "text"; } // Names of a few intent extras defined in VoiceSearch's RecognitionService. // These let us tweak the endpointer parameters. private static final String EXTRA_SPEECH_MINIMUM_LENGTH_MILLIS = "android.speech.extras.SPEECH_INPUT_MINIMUM_LENGTH_MILLIS"; private static final String EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS = "android.speech.extras.SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS"; private static final String EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS = "android.speech.extras.SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS"; // The usual endpointer default value for input complete silence length is 0.5 seconds, // but that's used for things like voice search. For dictation-like voice input like this, // we go with a more liberal value of 1 second. This value will only be used if a value // is not provided from Gservices. private static final String INPUT_COMPLETE_SILENCE_LENGTH_DEFAULT_VALUE_MILLIS = "1000"; // Used to record part of that state for logging purposes. public static final int DEFAULT = 0; public static final int LISTENING = 1; public static final int WORKING = 2; public static final int ERROR = 3; private int mAfterVoiceInputDeleteCount = 0; private int mAfterVoiceInputInsertCount = 0; private int mAfterVoiceInputInsertPunctuationCount = 0; private int mAfterVoiceInputCursorPos = 0; private int mAfterVoiceInputSelectionSpan = 0; private int mState = DEFAULT; private final static int MSG_RESET = 1; private final UIHandler mHandler = new UIHandler(this); private static class UIHandler extends StaticInnerHandlerWrapper<VoiceInput> { public UIHandler(VoiceInput outerInstance) { super(outerInstance); } @Override public void handleMessage(Message msg) { if (msg.what == MSG_RESET) { final VoiceInput voiceInput = getOuterInstance(); voiceInput.mState = DEFAULT; voiceInput.mRecognitionView.finish(); voiceInput.mUiListener.onCancelVoice(); } } }; /** * Events relating to the recognition UI. You must implement these. */ public interface UiListener { /** * @param recognitionResults a set of transcripts for what the user * spoke, sorted by likelihood. */ public void onVoiceResults( List<String> recognitionResults, Map<String, List<CharSequence>> alternatives); /** * Called when the user cancels speech recognition. */ public void onCancelVoice(); } private SpeechRecognizer mSpeechRecognizer; private RecognitionListener mRecognitionListener; private RecognitionView mRecognitionView; private UiListener mUiListener; private Context mContext; /** * @param context the service or activity in which we're running. * @param uiHandler object to receive events from VoiceInput. */ public VoiceInput(Context context, UiListener uiHandler) { mLogger = VoiceInputLogger.getLogger(context); mRecognitionListener = new ImeRecognitionListener(); mSpeechRecognizer = SpeechRecognizer.createSpeechRecognizer(context); mSpeechRecognizer.setRecognitionListener(mRecognitionListener); mUiListener = uiHandler; mContext = context; newView(); String recommendedPackages = SettingsUtil.getSettingsString( context.getContentResolver(), SettingsUtil.LATIN_IME_VOICE_INPUT_RECOMMENDED_PACKAGES, DEFAULT_RECOMMENDED_PACKAGES); mRecommendedList = new Whitelist(); for (String recommendedPackage : recommendedPackages.split("\\s+")) { mRecommendedList.addApp(recommendedPackage); } mBlacklist = new Whitelist(); mBlacklist.addApp("com.google.android.setupwizard"); } public void setCursorPos(int pos) { mAfterVoiceInputCursorPos = pos; } public int getCursorPos() { return mAfterVoiceInputCursorPos; } public void setSelectionSpan(int span) { mAfterVoiceInputSelectionSpan = span; } public int getSelectionSpan() { return mAfterVoiceInputSelectionSpan; } public void incrementTextModificationDeleteCount(int count){ mAfterVoiceInputDeleteCount += count; // Send up intents for other text modification types if (mAfterVoiceInputInsertCount > 0) { logTextModifiedByTypingInsertion(mAfterVoiceInputInsertCount); mAfterVoiceInputInsertCount = 0; } if (mAfterVoiceInputInsertPunctuationCount > 0) { logTextModifiedByTypingInsertionPunctuation(mAfterVoiceInputInsertPunctuationCount); mAfterVoiceInputInsertPunctuationCount = 0; } } public void incrementTextModificationInsertCount(int count){ mAfterVoiceInputInsertCount += count; if (mAfterVoiceInputSelectionSpan > 0) { // If text was highlighted before inserting the char, count this as // a delete. mAfterVoiceInputDeleteCount += mAfterVoiceInputSelectionSpan; } // Send up intents for other text modification types if (mAfterVoiceInputDeleteCount > 0) { logTextModifiedByTypingDeletion(mAfterVoiceInputDeleteCount); mAfterVoiceInputDeleteCount = 0; } if (mAfterVoiceInputInsertPunctuationCount > 0) { logTextModifiedByTypingInsertionPunctuation(mAfterVoiceInputInsertPunctuationCount); mAfterVoiceInputInsertPunctuationCount = 0; } } public void incrementTextModificationInsertPunctuationCount(int count){ mAfterVoiceInputInsertPunctuationCount += count; if (mAfterVoiceInputSelectionSpan > 0) { // If text was highlighted before inserting the char, count this as // a delete. mAfterVoiceInputDeleteCount += mAfterVoiceInputSelectionSpan; } // Send up intents for aggregated non-punctuation insertions if (mAfterVoiceInputDeleteCount > 0) { logTextModifiedByTypingDeletion(mAfterVoiceInputDeleteCount); mAfterVoiceInputDeleteCount = 0; } if (mAfterVoiceInputInsertCount > 0) { logTextModifiedByTypingInsertion(mAfterVoiceInputInsertCount); mAfterVoiceInputInsertCount = 0; } } public void flushAllTextModificationCounters() { if (mAfterVoiceInputInsertCount > 0) { logTextModifiedByTypingInsertion(mAfterVoiceInputInsertCount); mAfterVoiceInputInsertCount = 0; } if (mAfterVoiceInputDeleteCount > 0) { logTextModifiedByTypingDeletion(mAfterVoiceInputDeleteCount); mAfterVoiceInputDeleteCount = 0; } if (mAfterVoiceInputInsertPunctuationCount > 0) { logTextModifiedByTypingInsertionPunctuation(mAfterVoiceInputInsertPunctuationCount); mAfterVoiceInputInsertPunctuationCount = 0; } } /** * The configuration of the IME changed and may have caused the views to be layed out * again. Restore the state of the recognition view. */ public void onConfigurationChanged(Configuration configuration) { mRecognitionView.restoreState(); mRecognitionView.getView().dispatchConfigurationChanged(configuration); } /** * @return true if field is blacklisted for voice */ public boolean isBlacklistedField(FieldContext context) { return mBlacklist.matches(context); } /** * Used to decide whether to show voice input hints for this field, etc. * * @return true if field is recommended for voice */ public boolean isRecommendedField(FieldContext context) { return mRecommendedList.matches(context); } /** * Start listening for speech from the user. This will grab the microphone * and start updating the view provided by getView(). It is the caller's * responsibility to ensure that the view is visible to the user at this stage. * * @param context the same FieldContext supplied to voiceIsEnabled() * @param swipe whether this voice input was started by swipe, for logging purposes */ public void startListening(FieldContext context, boolean swipe) { if (DBG) { Log.d(TAG, "startListening: " + context); } if (mState != DEFAULT) { Log.w(TAG, "startListening in the wrong status " + mState); } // If everything works ok, the voice input should be already in the correct state. As this // class can be called by third-party, we call reset just to be on the safe side. reset(); Locale locale = Locale.getDefault(); String localeString = locale.getLanguage() + "-" + locale.getCountry(); mLogger.start(localeString, swipe); mState = LISTENING; mRecognitionView.showInitializing(); startListeningAfterInitialization(context); } /** * Called only when the recognition manager's initialization completed * * @param context context with which {@link #startListening(FieldContext, boolean)} was executed */ private void startListeningAfterInitialization(FieldContext context) { Intent intent = makeIntent(); intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, ""); intent.putExtra(EXTRA_RECOGNITION_CONTEXT, context.getBundle()); intent.putExtra(EXTRA_CALLING_PACKAGE, "VoiceIME"); intent.putExtra(EXTRA_ALTERNATES, true); intent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, SettingsUtil.getSettingsInt( mContext.getContentResolver(), SettingsUtil.LATIN_IME_MAX_VOICE_RESULTS, 1)); // Get endpointer params from Gservices. // TODO: Consider caching these values for improved performance on slower devices. final ContentResolver cr = mContext.getContentResolver(); putEndpointerExtra( cr, intent, SettingsUtil.LATIN_IME_SPEECH_MINIMUM_LENGTH_MILLIS, EXTRA_SPEECH_MINIMUM_LENGTH_MILLIS, null /* rely on endpointer default */); putEndpointerExtra( cr, intent, SettingsUtil.LATIN_IME_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, INPUT_COMPLETE_SILENCE_LENGTH_DEFAULT_VALUE_MILLIS /* our default value is different from the endpointer's */); putEndpointerExtra( cr, intent, SettingsUtil. LATIN_IME_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, null /* rely on endpointer default */); mSpeechRecognizer.startListening(intent); } /** * Gets the value of the provided Gservices key, attempts to parse it into a long, * and if successful, puts the long value as an extra in the provided intent. */ private void putEndpointerExtra(ContentResolver cr, Intent i, String gservicesKey, String intentExtraKey, String defaultValue) { long l = -1; String s = SettingsUtil.getSettingsString(cr, gservicesKey, defaultValue); if (s != null) { try { l = Long.valueOf(s); } catch (NumberFormatException e) { Log.e(TAG, "could not parse value for " + gservicesKey + ": " + s); } } if (l != -1) i.putExtra(intentExtraKey, l); } public void destroy() { mSpeechRecognizer.destroy(); } /** * Creates a new instance of the view that is returned by {@link #getView()} * Clients should use this when a previously returned view is stuck in a * layout that is being thrown away and a new one is need to show to the * user. */ public void newView() { mRecognitionView = new RecognitionView(mContext, this); } /** * @return a view that shows the recognition flow--e.g., "Speak now" and * "working" dialogs. */ public View getView() { return mRecognitionView.getView(); } /** * Handle the cancel button. */ @Override public void onClick(View view) { switch(view.getId()) { case R.id.button: cancel(); break; } } public void logTextModifiedByTypingInsertion(int length) { mLogger.textModifiedByTypingInsertion(length); } public void logTextModifiedByTypingInsertionPunctuation(int length) { mLogger.textModifiedByTypingInsertionPunctuation(length); } public void logTextModifiedByTypingDeletion(int length) { mLogger.textModifiedByTypingDeletion(length); } public void logTextModifiedByChooseSuggestion(String suggestion, int index, String wordSeparators, InputConnection ic) { String wordToBeReplaced = EditingUtils.getWordAtCursor(ic, wordSeparators); // If we enable phrase-based alternatives, only send up the first word // in suggestion and wordToBeReplaced. mLogger.textModifiedByChooseSuggestion(suggestion.length(), wordToBeReplaced.length(), index, wordToBeReplaced, suggestion); } public void logKeyboardWarningDialogShown() { mLogger.keyboardWarningDialogShown(); } public void logKeyboardWarningDialogDismissed() { mLogger.keyboardWarningDialogDismissed(); } public void logKeyboardWarningDialogOk() { mLogger.keyboardWarningDialogOk(); } public void logKeyboardWarningDialogCancel() { mLogger.keyboardWarningDialogCancel(); } public void logSwipeHintDisplayed() { mLogger.swipeHintDisplayed(); } public void logPunctuationHintDisplayed() { mLogger.punctuationHintDisplayed(); } public void logVoiceInputDelivered(int length) { mLogger.voiceInputDelivered(length); } public void logInputEnded() { mLogger.inputEnded(); } public void flushLogs() { mLogger.flush(); } private static Intent makeIntent() { Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH); // On Cupcake, use VoiceIMEHelper since VoiceSearch doesn't support. // On Donut, always use VoiceSearch, since VoiceIMEHelper and // VoiceSearch may conflict. if (Build.VERSION.RELEASE.equals("1.5")) { intent = intent.setClassName( "com.google.android.voiceservice", "com.google.android.voiceservice.IMERecognitionService"); } else { intent = intent.setClassName( "com.google.android.voicesearch", "com.google.android.voicesearch.RecognitionService"); } return intent; } /** * Reset the current voice recognition. */ public void reset() { if (mState != DEFAULT) { mState = DEFAULT; // Remove all pending tasks (e.g., timers to cancel voice input) mHandler.removeMessages(MSG_RESET); mSpeechRecognizer.cancel(); mRecognitionView.finish(); } } /** * Cancel in-progress speech recognition. */ public void cancel() { switch (mState) { case LISTENING: mLogger.cancelDuringListening(); break; case WORKING: mLogger.cancelDuringWorking(); break; case ERROR: mLogger.cancelDuringError(); break; } reset(); mUiListener.onCancelVoice(); } private int getErrorStringId(int errorType, boolean endpointed) { switch (errorType) { // We use CLIENT_ERROR to signify that voice search is not available on the device. case SpeechRecognizer.ERROR_CLIENT: return R.string.voice_not_installed; case SpeechRecognizer.ERROR_NETWORK: return R.string.voice_network_error; case SpeechRecognizer.ERROR_NETWORK_TIMEOUT: return endpointed ? R.string.voice_network_error : R.string.voice_too_much_speech; case SpeechRecognizer.ERROR_AUDIO: return R.string.voice_audio_error; case SpeechRecognizer.ERROR_SERVER: return R.string.voice_server_error; case SpeechRecognizer.ERROR_SPEECH_TIMEOUT: return R.string.voice_speech_timeout; case SpeechRecognizer.ERROR_NO_MATCH: return R.string.voice_no_match; default: return R.string.voice_error; } } private void onError(int errorType, boolean endpointed) { Log.i(TAG, "error " + errorType); mLogger.error(errorType); onError(mContext.getString(getErrorStringId(errorType, endpointed))); } private void onError(String error) { mState = ERROR; mRecognitionView.showError(error); // Wait a couple seconds and then automatically dismiss message. mHandler.sendMessageDelayed(Message.obtain(mHandler, MSG_RESET), 2000); } private class ImeRecognitionListener implements RecognitionListener { // Waveform data final ByteArrayOutputStream mWaveBuffer = new ByteArrayOutputStream(); int mSpeechStart; private boolean mEndpointed = false; @Override public void onReadyForSpeech(Bundle noiseParams) { mRecognitionView.showListening(); } @Override public void onBeginningOfSpeech() { mEndpointed = false; mSpeechStart = mWaveBuffer.size(); } @Override public void onRmsChanged(float rmsdB) { mRecognitionView.updateVoiceMeter(rmsdB); } @Override public void onBufferReceived(byte[] buf) { try { mWaveBuffer.write(buf); } catch (IOException e) { // ignore. } } @Override public void onEndOfSpeech() { mEndpointed = true; mState = WORKING; mRecognitionView.showWorking(mWaveBuffer, mSpeechStart, mWaveBuffer.size()); } @Override public void onError(int errorType) { mState = ERROR; VoiceInput.this.onError(errorType, mEndpointed); } @Override public void onResults(Bundle resultsBundle) { List<String> results = resultsBundle .getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION); // VS Market update is needed for IME froyo clients to access the alternatesBundle // TODO: verify this. Bundle alternatesBundle = resultsBundle.getBundle(ALTERNATES_BUNDLE); mState = DEFAULT; final Map<String, List<CharSequence>> alternatives = new HashMap<String, List<CharSequence>>(); if (ENABLE_WORD_CORRECTIONS && alternatesBundle != null && results.size() > 0) { // Use the top recognition result to map each alternative's start:length to a word. String[] words = results.get(0).split(" "); Bundle spansBundle = alternatesBundle.getBundle(AlternatesBundleKeys.SPANS); for (String key : spansBundle.keySet()) { // Get the word for which these alternates correspond to. Bundle spanBundle = spansBundle.getBundle(key); int start = spanBundle.getInt(AlternatesBundleKeys.START); int length = spanBundle.getInt(AlternatesBundleKeys.LENGTH); // Only keep single-word based alternatives. if (length == 1 && start < words.length) { // Get the alternatives associated with the span. // If a word appears twice in a recognition result, // concatenate the alternatives for the word. List<CharSequence> altList = alternatives.get(words[start]); if (altList == null) { altList = new ArrayList<CharSequence>(); alternatives.put(words[start], altList); } Parcelable[] alternatesArr = spanBundle .getParcelableArray(AlternatesBundleKeys.ALTERNATES); for (int j = 0; j < alternatesArr.length && altList.size() < MAX_ALT_LIST_LENGTH; j++) { Bundle alternateBundle = (Bundle) alternatesArr[j]; String alternate = alternateBundle.getString(AlternatesBundleKeys.TEXT); // Don't allow duplicates in the alternates list. if (!altList.contains(alternate)) { altList.add(alternate); } } } } } if (results.size() > 5) { results = results.subList(0, 5); } mUiListener.onVoiceResults(results, alternatives); mRecognitionView.finish(); } @Override public void onPartialResults(final Bundle partialResults) { // currently - do nothing } @Override public void onEvent(int eventType, Bundle params) { // do nothing - reserved for events that might be added in the future } } }