/* * Copyright (C) 2010 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.voicedialer; import android.app.Activity; import android.app.AlertDialog; import android.bluetooth.BluetoothAdapter; import android.bluetooth.BluetoothDevice; import android.bluetooth.BluetoothHeadset; import android.bluetooth.BluetoothProfile; import android.content.BroadcastReceiver; import android.content.Context; import android.content.DialogInterface; import android.content.Intent; import android.content.IntentFilter; import android.media.AudioManager; import android.media.ToneGenerator; import android.os.Bundle; import android.os.Handler; import android.os.PowerManager; import android.os.PowerManager.WakeLock; import android.os.SystemProperties; import android.os.Vibrator; import android.speech.tts.TextToSpeech; import android.util.Log; import android.view.View; import android.view.WindowManager; import android.widget.TextView; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; import java.util.List; /** * TODO: get rid of the anonymous classes * * This class is the user interface of the VoiceDialer application. * It begins in the INITIALIZING state. * * INITIALIZING : * This transitions out on events from TTS and the BluetoothHeadset * once TTS initialized and SCO channel set up: * * prompt the user "speak now" * * transition to the SPEAKING_GREETING state * * SPEAKING_GREETING: * This transitions out only on events from TTS or the fallback runnable * once the greeting utterance completes: * * begin listening for the command using the {@link CommandRecognizerEngine} * * transition to the WAITING_FOR_COMMAND state * * WAITING_FOR_COMMAND : * This transitions out only on events from the recognizer * on RecognitionFailure or RecognitionError: * * begin speaking "try again." * * transition to state SPEAKING_TRY_AGAIN * on RecognitionSuccess: * single result: * * begin speaking the sentence describing the intent * * transition to the SPEAKING_CHOSEN_ACTION * multiple results: * * begin speaking each of the choices in order * * transition to the SPEAKING_CHOICES state * * SPEAKING_TRY_AGAIN: * This transitions out only on events from TTS or the fallback runnable * once the try again utterance completes: * * begin listening for the command using the {@link CommandRecognizerEngine} * * transition to the LISTENING_FOR_COMMAND state * * SPEAKING_CHOSEN_ACTION: * This transitions out only on events from TTS or the fallback runnable * once the utterance completes: * * dispatch the intent that was chosen * * transition to the EXITING state * * finish the activity * * SPEAKING_CHOICES: * This transitions out only on events from TTS or the fallback runnable * once the utterance completes: * * begin listening for the user's choice using the * {@link PhoneTypeChoiceRecognizerEngine} * * transition to the WAITING_FOR_CHOICE state. * * WAITING_FOR_CHOICE: * This transitions out only on events from the recognizer * on RecognitionFailure or RecognitionError: * * begin speaking the "invalid choice" message, along with the list * of choices * * transition to the SPEAKING_CHOICES state * on RecognitionSuccess: * if the result is "try again", prompt the user to say a command, begin * listening for the command, and transition back to the WAITING_FOR_COMMAND * state. * if the result is "exit", then being speaking the "goodbye" message and * transition to the SPEAKING_GOODBYE state. * if the result is a valid choice, begin speaking the action chosen,initiate * the command the user has choose and exit. * if not a valid choice, speak the "invalid choice" message, begin * speaking the choices in order again, transition to the * SPEAKING_CHOICES * * SPEAKING_GOODBYE: * This transitions out only on events from TTS or the fallback runnable * after a time out, finish the activity. * */ public class VoiceDialerActivity extends Activity { private static final String TAG = "VoiceDialerActivity"; private static final String MICROPHONE_EXTRA = "microphone"; private static final String CONTACTS_EXTRA = "contacts"; private static final String SPEAK_NOW_UTTERANCE = "speak_now"; private static final String TRY_AGAIN_UTTERANCE = "try_again"; private static final String CHOSEN_ACTION_UTTERANCE = "chose_action"; private static final String GOODBYE_UTTERANCE = "goodbye"; private static final String CHOICES_UTTERANCE = "choices"; private static final int FIRST_UTTERANCE_DELAY = 300; private static final int MAX_TTS_DELAY = 6000; private static final int EXIT_DELAY = 2000; private static final int BLUETOOTH_SAMPLE_RATE = 8000; private static final int REGULAR_SAMPLE_RATE = 11025; private static final int INITIALIZING = 0; private static final int SPEAKING_GREETING = 1; private static final int WAITING_FOR_COMMAND = 2; private static final int SPEAKING_TRY_AGAIN = 3; private static final int SPEAKING_CHOICES = 4; private static final int WAITING_FOR_CHOICE = 5; private static final int WAITING_FOR_DIALOG_CHOICE = 6; private static final int SPEAKING_CHOSEN_ACTION = 7; private static final int SPEAKING_GOODBYE = 8; private static final int EXITING = 9; private static final CommandRecognizerEngine mCommandEngine = new CommandRecognizerEngine(); private static final PhoneTypeChoiceRecognizerEngine mPhoneTypeChoiceEngine = new PhoneTypeChoiceRecognizerEngine(); private CommandRecognizerClient mCommandClient; private ChoiceRecognizerClient mChoiceClient; private ToneGenerator mToneGenerator; private Handler mHandler; private Thread mRecognizerThread = null; private AudioManager mAudioManager; private BluetoothHeadset mBluetoothHeadset; private BluetoothDevice mBluetoothDevice; private BluetoothAdapter mAdapter; private TextToSpeech mTts; private HashMap<String, String> mTtsParams; private VoiceDialerBroadcastReceiver mReceiver; private boolean mWaitingForTts; private boolean mWaitingForScoConnection; private Intent[] mAvailableChoices; private Intent mChosenAction; private int mBluetoothVoiceVolume; private int mState; private AlertDialog mAlertDialog; private Runnable mFallbackRunnable; private boolean mUsingBluetooth = false; private int mSampleRate; private WakeLock mWakeLock; @Override protected void onCreate(Bundle icicle) { super.onCreate(icicle); // TODO: All of this state management and holding of // connections to the TTS engine and recognizer really // belongs in a service. The activity can be stopped or deleted // and recreated for lots of reasons. // It's way too late in the ICS release cycle for a change // like this now though. // MHibdon Sept 20 2011 mHandler = new Handler(); mAudioManager = (AudioManager)getSystemService(AUDIO_SERVICE); mToneGenerator = new ToneGenerator(AudioManager.STREAM_RING, ToneGenerator.MAX_VOLUME); acquireWakeLock(this); mState = INITIALIZING; mChosenAction = null; mAudioManager.requestAudioFocus( null, AudioManager.STREAM_MUSIC, AudioManager.AUDIOFOCUS_GAIN_TRANSIENT); // set this flag so this activity will stay in front of the keyguard int flags = WindowManager.LayoutParams.FLAG_SHOW_WHEN_LOCKED; getWindow().addFlags(flags); // open main window setTheme(android.R.style.Theme_Dialog); setTitle(R.string.title); setContentView(R.layout.voice_dialing); findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE); findViewById(R.id.retry_view).setVisibility(View.INVISIBLE); findViewById(R.id.microphone_loading_view).setVisibility(View.VISIBLE); if (RecognizerLogger.isEnabled(this)) { ((TextView) findViewById(R.id.substate)).setText(R.string.logging_enabled); } // Get handle to BluetoothHeadset object IntentFilter audioStateFilter; audioStateFilter = new IntentFilter(); audioStateFilter.addAction(BluetoothHeadset.ACTION_CONNECTION_STATE_CHANGED); audioStateFilter.addAction(BluetoothHeadset.ACTION_AUDIO_STATE_CHANGED); mReceiver = new VoiceDialerBroadcastReceiver(); registerReceiver(mReceiver, audioStateFilter); mCommandEngine.setContactsFile(newFile(getArg(CONTACTS_EXTRA))); mCommandEngine.setMinimizeResults(true); mCommandEngine.setAllowOpenEntries(false); mCommandClient = new CommandRecognizerClient(); mChoiceClient = new ChoiceRecognizerClient(); mAdapter = BluetoothAdapter.getDefaultAdapter(); if (BluetoothHeadset.isBluetoothVoiceDialingEnabled(this) && mAdapter != null) { if (!mAdapter.getProfileProxy(this, mBluetoothHeadsetServiceListener, BluetoothProfile.HEADSET)) { Log.e(TAG, "Getting Headset Proxy failed"); } } else { mUsingBluetooth = false; if (false) Log.d(TAG, "bluetooth unavailable"); mSampleRate = REGULAR_SAMPLE_RATE; mCommandEngine.setMinimizeResults(false); mCommandEngine.setAllowOpenEntries(true); // we're not using bluetooth apparently, just start listening. listenForCommand(); } } class ErrorRunnable implements Runnable { private int mErrorMsg; public ErrorRunnable(int errorMsg) { mErrorMsg = errorMsg; } public void run() { // put up an error and exit mHandler.removeCallbacks(mMicFlasher); ((TextView)findViewById(R.id.state)).setText(R.string.failure); ((TextView)findViewById(R.id.substate)).setText(mErrorMsg); ((TextView)findViewById(R.id.substate)).setText( R.string.headset_connection_lost); findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE); findViewById(R.id.retry_view).setVisibility(View.VISIBLE); if (!mUsingBluetooth) { playSound(ToneGenerator.TONE_PROP_NACK); } } } class OnTtsCompletionRunnable implements Runnable { private boolean mFallback; OnTtsCompletionRunnable(boolean fallback) { mFallback = fallback; } public void run() { if (mFallback) { Log.e(TAG, "utterance completion not delivered, using fallback"); } Log.d(TAG, "onTtsCompletionRunnable"); if (mState == SPEAKING_GREETING || mState == SPEAKING_TRY_AGAIN) { listenForCommand(); } else if (mState == SPEAKING_CHOICES) { listenForChoice(); } else if (mState == SPEAKING_GOODBYE) { mState = EXITING; finish(); } else if (mState == SPEAKING_CHOSEN_ACTION) { mState = EXITING; startActivityHelp(mChosenAction); finish(); } } } class GreetingRunnable implements Runnable { public void run() { mState = SPEAKING_GREETING; mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, SPEAK_NOW_UTTERANCE); mTts.speak(getString(R.string.speak_now_tts), TextToSpeech.QUEUE_FLUSH, mTtsParams); // Normally, we will begin listening for the command after the // utterance completes. As a fallback in case the utterance // does not complete, post a delayed runnable to fire // the intent. mFallbackRunnable = new OnTtsCompletionRunnable(true); mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); } } class TtsInitListener implements TextToSpeech.OnInitListener { public void onInit(int status) { // status can be either TextToSpeech.SUCCESS or TextToSpeech.ERROR. if (false) Log.d(TAG, "onInit for tts"); if (status != TextToSpeech.SUCCESS) { // Initialization failed. Log.e(TAG, "Could not initialize TextToSpeech."); mHandler.post(new ErrorRunnable(R.string.recognition_error)); exitActivity(); return; } if (mTts == null) { Log.e(TAG, "null tts"); mHandler.post(new ErrorRunnable(R.string.recognition_error)); exitActivity(); return; } mTts.setOnUtteranceCompletedListener(new OnUtteranceCompletedListener()); // The TTS engine has been successfully initialized. mWaitingForTts = false; // TTS over bluetooth is really loud, // Limit volume to -18dB. Stream volume range represents approximately 50dB // (See AudioSystem.cpp linearToLog()) so the number of steps corresponding // to 18dB is 18 / (50 / maxSteps). mBluetoothVoiceVolume = mAudioManager.getStreamVolume( AudioManager.STREAM_BLUETOOTH_SCO); int maxVolume = mAudioManager.getStreamMaxVolume(AudioManager.STREAM_BLUETOOTH_SCO); int volume = maxVolume - ((18 / (50/maxVolume)) + 1); if (mBluetoothVoiceVolume > volume) { mAudioManager.setStreamVolume(AudioManager.STREAM_BLUETOOTH_SCO, volume, 0); } if (mWaitingForScoConnection) { // the bluetooth connection is not up yet, still waiting. } else { // we now have SCO connection and TTS, so we can start. mHandler.postDelayed(new GreetingRunnable(), FIRST_UTTERANCE_DELAY); } } } class OnUtteranceCompletedListener implements TextToSpeech.OnUtteranceCompletedListener { public void onUtteranceCompleted(String utteranceId) { if (false) Log.d(TAG, "onUtteranceCompleted " + utteranceId); // since the utterance has completed, we no longer need the fallback. mHandler.removeCallbacks(mFallbackRunnable); mFallbackRunnable = null; mHandler.post(new OnTtsCompletionRunnable(false)); } } private void updateBluetoothParameters(boolean connected) { if (connected) { if (false) Log.d(TAG, "using bluetooth"); mUsingBluetooth = true; mBluetoothHeadset.startVoiceRecognition(mBluetoothDevice); mSampleRate = BLUETOOTH_SAMPLE_RATE; mCommandEngine.setMinimizeResults(true); mCommandEngine.setAllowOpenEntries(false); // we can't start recognizing until we get connected to the BluetoothHeadset // and have a connected audio state. We will listen for these // states to change. mWaitingForScoConnection = true; // initialize the text to speech system mWaitingForTts = true; mTts = new TextToSpeech(VoiceDialerActivity.this, new TtsInitListener()); mTtsParams = new HashMap<String, String>(); mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_STREAM, String.valueOf(AudioManager.STREAM_VOICE_CALL)); // we need to wait for the TTS system and the SCO connection // before we can start listening. } else { if (false) Log.d(TAG, "not using bluetooth"); mUsingBluetooth = false; mSampleRate = REGULAR_SAMPLE_RATE; mCommandEngine.setMinimizeResults(false); mCommandEngine.setAllowOpenEntries(true); // we're not using bluetooth apparently, just start listening. listenForCommand(); } } private BluetoothProfile.ServiceListener mBluetoothHeadsetServiceListener = new BluetoothProfile.ServiceListener() { public void onServiceConnected(int profile, BluetoothProfile proxy) { if (false) Log.d(TAG, "onServiceConnected"); mBluetoothHeadset = (BluetoothHeadset) proxy; List<BluetoothDevice> deviceList = mBluetoothHeadset.getConnectedDevices(); if (deviceList.size() > 0) { mBluetoothDevice = deviceList.get(0); int state = mBluetoothHeadset.getConnectionState(mBluetoothDevice); if (false) Log.d(TAG, "headset status " + state); // We are already connnected to a headset if (state == BluetoothHeadset.STATE_CONNECTED) { updateBluetoothParameters(true); return; } } updateBluetoothParameters(false); } public void onServiceDisconnected(int profile) { mBluetoothHeadset = null; } }; private class VoiceDialerBroadcastReceiver extends BroadcastReceiver { @Override public void onReceive(Context context, Intent intent) { String action = intent.getAction(); if (action.equals(BluetoothHeadset.ACTION_CONNECTION_STATE_CHANGED)) { BluetoothDevice device = intent.getParcelableExtra(BluetoothDevice.EXTRA_DEVICE); int state = intent.getIntExtra(BluetoothProfile.EXTRA_STATE, -1); if (false) Log.d(TAG, "HEADSET STATE -> " + state); if (state == BluetoothProfile.STATE_CONNECTED) { if (device == null) { return; } mBluetoothDevice = device; updateBluetoothParameters(true); } else if (state == BluetoothProfile.STATE_DISCONNECTED) { mBluetoothDevice = null; updateBluetoothParameters(false); } } else if (action.equals(BluetoothHeadset.ACTION_AUDIO_STATE_CHANGED)) { int state = intent.getIntExtra(BluetoothProfile.EXTRA_STATE, -1); int prevState = intent.getIntExtra(BluetoothProfile.EXTRA_PREVIOUS_STATE, -1); if (state == BluetoothHeadset.STATE_AUDIO_CONNECTED && mWaitingForScoConnection) { // SCO channel has just become available. mWaitingForScoConnection = false; if (mWaitingForTts) { // still waiting for the TTS to be set up. } else { // we now have SCO connection and TTS, so we can start. mHandler.postDelayed(new GreetingRunnable(), FIRST_UTTERANCE_DELAY); } } else if (prevState == BluetoothHeadset.STATE_AUDIO_CONNECTED) { if (!mWaitingForScoConnection) { // apparently our connection to the headset has dropped. // we won't be able to continue voicedialing. if (false) Log.d(TAG, "lost sco connection"); mHandler.post(new ErrorRunnable( R.string.headset_connection_lost)); exitActivity(); } } } } } private void askToTryAgain() { // get work off UAPI thread mHandler.post(new Runnable() { public void run() { if (mAlertDialog != null) { mAlertDialog.dismiss(); } mHandler.removeCallbacks(mMicFlasher); ((TextView)findViewById(R.id.state)).setText(R.string.please_try_again); findViewById(R.id.state).setVisibility(View.VISIBLE); findViewById(R.id.microphone_view).setVisibility(View.INVISIBLE); findViewById(R.id.retry_view).setVisibility(View.VISIBLE); if (mUsingBluetooth) { mState = SPEAKING_TRY_AGAIN; mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, TRY_AGAIN_UTTERANCE); mTts.speak(getString(R.string.no_results_tts), TextToSpeech.QUEUE_FLUSH, mTtsParams); // Normally, the we will start listening after the // utterance completes. As a fallback in case the utterance // does not complete, post a delayed runnable to fire // the intent. mFallbackRunnable = new OnTtsCompletionRunnable(true); mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); } else { try { Thread.sleep(playSound(ToneGenerator.TONE_PROP_NACK)); } catch (InterruptedException e) { } // we are not using tts, so we just start listening again. listenForCommand(); } } }); } private void performChoice() { if (mUsingBluetooth) { String sentenceSpoken = spaceOutDigits( mChosenAction.getStringExtra( RecognizerEngine.SENTENCE_EXTRA)); mState = SPEAKING_CHOSEN_ACTION; mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, CHOSEN_ACTION_UTTERANCE); mTts.speak(sentenceSpoken, TextToSpeech.QUEUE_FLUSH, mTtsParams); // Normally, the intent will be dispatched after the // utterance completes. As a fallback in case the utterance // does not complete, post a delayed runnable to fire // the intent. mFallbackRunnable = new OnTtsCompletionRunnable(true); mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); } else { // just dispatch the intent startActivityHelp(mChosenAction); finish(); } } private void waitForChoice() { if (mUsingBluetooth) { // We are running in bluetooth mode, and we have // multiple matches. Speak the choices and let // the user choose. // We will not start listening until the utterance // of the choice list completes. speakChoices(); // Normally, listening will begin after the // utterance completes. As a fallback in case the utterance // does not complete, post a delayed runnable to begin // listening. mFallbackRunnable = new OnTtsCompletionRunnable(true); mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); } else { // We are not running in bluetooth mode, so all // we need to do is wait for the user to select // a choice from the alert dialog. We will wait // indefinitely for this. mState = WAITING_FOR_DIALOG_CHOICE; } } private class CommandRecognizerClient implements RecognizerClient { static final int MIN_VOLUME_TO_SKIP = 2; /** * Called by the {@link RecognizerEngine} when the microphone is started. */ public void onMicrophoneStart(InputStream mic) { if (false) Log.d(TAG, "onMicrophoneStart"); if (!mUsingBluetooth) { playSound(ToneGenerator.TONE_PROP_BEEP); int ringVolume = mAudioManager.getStreamVolume( AudioManager.STREAM_RING); Log.d(TAG, "ringVolume " + ringVolume); if (ringVolume >= MIN_VOLUME_TO_SKIP) { // now we're playing a sound, and corrupting the input sample. // So we need to pull that junk off of the input stream so that the // recognizer won't see it. try { skipBeep(mic); } catch (java.io.IOException e) { Log.e(TAG, "IOException " + e); } } else { if (false) Log.d(TAG, "no tone"); } } mHandler.post(new Runnable() { public void run() { findViewById(R.id.retry_view).setVisibility(View.INVISIBLE); findViewById(R.id.microphone_loading_view).setVisibility( View.INVISIBLE); ((TextView)findViewById(R.id.state)).setText(R.string.listening); mHandler.post(mMicFlasher); } }); } /** * Beep detection */ private static final int START_WINDOW_MS = 500; // Beep detection window duration in ms private static final int SINE_FREQ = 400; // base sine frequency on beep private static final int NUM_PERIODS_BLOCK = 10; // number of sine periods in one energy averaging block private static final int THRESHOLD = 8; // absolute pseudo energy threshold private static final int START = 0; // beep detection start private static final int RISING = 1; // beep rising edge start private static final int TOP = 2; // beep constant energy detected void skipBeep(InputStream is) throws IOException { int sampleCount = ((mSampleRate / SINE_FREQ) * NUM_PERIODS_BLOCK); int blockSize = 2 * sampleCount; // energy averaging block if (is == null || blockSize == 0) { return; } byte[] buf = new byte[blockSize]; int maxBytes = 2 * ((START_WINDOW_MS * mSampleRate) / 1000); maxBytes = ((maxBytes-1) / blockSize + 1) * blockSize; int count = 0; int state = START; // detection state long prevE = 0; // previous pseudo energy long peak = 0; int threshold = THRESHOLD*sampleCount; // absolute energy threshold Log.d(TAG, "blockSize " + blockSize); while (count < maxBytes) { int cnt = 0; while (cnt < blockSize) { int n = is.read(buf, cnt, blockSize-cnt); if (n < 0) { throw new java.io.IOException(); } cnt += n; } // compute pseudo energy cnt = blockSize; long sumx = 0; long sumxx = 0; while (cnt >= 2) { short smp = (short)((buf[cnt - 1] << 8) + (buf[cnt - 2] & 0xFF)); sumx += smp; sumxx += smp*smp; cnt -= 2; } long energy = (sumxx*sampleCount - sumx*sumx)/(sampleCount*sampleCount); Log.d(TAG, "sumx " + sumx + " sumxx " + sumxx + " ee " + energy); switch (state) { case START: if (energy > threshold && energy > (prevE * 2) && prevE != 0) { // rising edge if energy doubled and > abs threshold state = RISING; if (false) Log.d(TAG, "start RISING: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); } break; case RISING: if (energy < threshold || energy < (prevE / 2)){ // energy fell back below half of previous, back to start if (false) Log.d(TAG, "back to START: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); peak = 0; state = START; } else if (energy > (prevE / 2) && energy < (prevE * 2)) { // Start of constant energy if (false) Log.d(TAG, "start TOP: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); if (peak < energy) { peak = energy; } state = TOP; } break; case TOP: if (energy < threshold || energy < (peak / 2)) { // e went to less than half of the peak if (false) Log.d(TAG, "end TOP: " + count +" time: "+ (((1000*count)/2)/mSampleRate)); return; } break; } prevE = energy; count += blockSize; } if (false) Log.d(TAG, "no beep detected, timed out"); } /** * Called by the {@link RecognizerEngine} if the recognizer fails. */ public void onRecognitionFailure(final String msg) { if (false) Log.d(TAG, "onRecognitionFailure " + msg); // we had zero results. Just try again. askToTryAgain(); } /** * Called by the {@link RecognizerEngine} on an internal error. */ public void onRecognitionError(final String msg) { if (false) Log.d(TAG, "onRecognitionError " + msg); mHandler.post(new ErrorRunnable(R.string.recognition_error)); exitActivity(); } /** * Called by the {@link RecognizerEngine} when is succeeds. If there is * only one item, then the Intent is dispatched immediately. * If there are more, then an AlertDialog is displayed and the user is * prompted to select. * @param intents a list of Intents corresponding to the sentences. */ public void onRecognitionSuccess(final Intent[] intents) { if (false) Log.d(TAG, "CommandRecognizerClient onRecognitionSuccess " + intents.length); if (mState != WAITING_FOR_COMMAND) { if (false) Log.d(TAG, "not waiting for command, ignoring"); return; } // store the intents in a member variable so that we can access it // later when the user chooses which action to perform. mAvailableChoices = intents; mHandler.post(new Runnable() { public void run() { if (!mUsingBluetooth) { playSound(ToneGenerator.TONE_PROP_ACK); } mHandler.removeCallbacks(mMicFlasher); String[] sentences = new String[intents.length]; for (int i = 0; i < intents.length; i++) { sentences[i] = intents[i].getStringExtra( RecognizerEngine.SENTENCE_EXTRA); } if (intents.length == 0) { onRecognitionFailure("zero intents"); return; } if (intents.length > 0) { // see if we the response was "exit" or "cancel". String value = intents[0].getStringExtra( RecognizerEngine.SEMANTIC_EXTRA); if (false) Log.d(TAG, "value " + value); if ("X".equals(value)) { exitActivity(); return; } } if (mUsingBluetooth && (intents.length == 1 || !Intent.ACTION_CALL_PRIVILEGED.equals( intents[0].getAction()))) { // When we're running in bluetooth mode, we expect // that the user is not looking at the screen and cannot // interact with the device in any way besides voice // commands. In this case we need to minimize how many // interactions the user has to perform in order to call // someone. // So if there is only one match, instead of making the // user confirm, we just assume it's correct, speak // the choice over TTS, and then dispatch it. // If there are multiple matches for some intent type // besides "call", it's too difficult for the user to // explain which one they meant, so we just take the highest // confidence match and dispatch that. // Speak the sentence for the action we are about // to dispatch so that the user knows what is happening. mChosenAction = intents[0]; performChoice(); return; } else { // Either we are not running in bluetooth mode, // or we had multiple matches. Either way, we need // the user to confirm the choice. // Put up a dialog from which the user can select // his/her choice. DialogInterface.OnCancelListener cancelListener = new DialogInterface.OnCancelListener() { public void onCancel(DialogInterface dialog) { if (false) { Log.d(TAG, "cancelListener.onCancel"); } dialog.dismiss(); finish(); } }; DialogInterface.OnClickListener clickListener = new DialogInterface.OnClickListener() { public void onClick(DialogInterface dialog, int which) { if (false) { Log.d(TAG, "clickListener.onClick " + which); } startActivityHelp(intents[which]); dialog.dismiss(); finish(); } }; DialogInterface.OnClickListener negativeListener = new DialogInterface.OnClickListener() { public void onClick(DialogInterface dialog, int which) { if (false) { Log.d(TAG, "negativeListener.onClick " + which); } dialog.dismiss(); finish(); } }; mAlertDialog = new AlertDialog.Builder(VoiceDialerActivity.this, AlertDialog.THEME_HOLO_DARK) .setTitle(R.string.title) .setItems(sentences, clickListener) .setOnCancelListener(cancelListener) .setNegativeButton(android.R.string.cancel, negativeListener) .show(); waitForChoice(); } } }); } } private class ChoiceRecognizerClient implements RecognizerClient { public void onRecognitionSuccess(final Intent[] intents) { if (false) Log.d(TAG, "ChoiceRecognizerClient onRecognitionSuccess"); if (mState != WAITING_FOR_CHOICE) { if (false) Log.d(TAG, "not waiting for choice, ignoring"); return; } if (mAlertDialog != null) { mAlertDialog.dismiss(); } // disregard all but the first intent. if (intents.length > 0) { String value = intents[0].getStringExtra( RecognizerEngine.SEMANTIC_EXTRA); if (false) Log.d(TAG, "value " + value); if ("R".equals(value)) { if (mUsingBluetooth) { mHandler.post(new GreetingRunnable()); } else { listenForCommand(); } } else if ("X".equals(value)) { exitActivity(); } else { // it's a phone type response mChosenAction = null; for (int i = 0; i < mAvailableChoices.length; i++) { if (value.equalsIgnoreCase( mAvailableChoices[i].getStringExtra( CommandRecognizerEngine.PHONE_TYPE_EXTRA))) { mChosenAction = mAvailableChoices[i]; } } if (mChosenAction != null) { performChoice(); } else { // invalid choice if (false) Log.d(TAG, "invalid choice" + value); if (mUsingBluetooth) { mTtsParams.remove(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID); mTts.speak(getString(R.string.invalid_choice_tts), TextToSpeech.QUEUE_FLUSH, mTtsParams); } waitForChoice(); } } } } public void onRecognitionFailure(String msg) { if (false) Log.d(TAG, "ChoiceRecognizerClient onRecognitionFailure"); exitActivity(); } public void onRecognitionError(String err) { if (false) Log.d(TAG, "ChoiceRecognizerClient onRecognitionError"); mHandler.post(new ErrorRunnable(R.string.recognition_error)); exitActivity(); } public void onMicrophoneStart(InputStream mic) { if (false) Log.d(TAG, "ChoiceRecognizerClient onMicrophoneStart"); } } private void speakChoices() { if (false) Log.d(TAG, "speakChoices"); mState = SPEAKING_CHOICES; String sentenceSpoken = spaceOutDigits( mAvailableChoices[0].getStringExtra( RecognizerEngine.SENTENCE_EXTRA)); // When we have multiple choices, they will be of the form // "call jack jones at home", "call jack jones on mobile". // Speak the entire first sentence, then the last word from each // of the remaining sentences. This will come out to something // like "call jack jones at home mobile or work". StringBuilder builder = new StringBuilder(); builder.append(sentenceSpoken); int count = mAvailableChoices.length; for (int i=1; i < count; i++) { if (i == count-1) { builder.append(" or "); } else { builder.append(" "); } String tmpSentence = mAvailableChoices[i].getStringExtra( RecognizerEngine.SENTENCE_EXTRA); String[] words = tmpSentence.trim().split(" "); builder.append(words[words.length-1]); } mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, CHOICES_UTTERANCE); mTts.speak(builder.toString(), TextToSpeech.QUEUE_ADD, mTtsParams); } private static String spaceOutDigits(String sentenceDisplay) { // if we have a sentence of the form "dial 123 456 7890", // we need to insert a space between each digit, otherwise // the TTS engine will say "dial one hundred twenty three...." // When there already is a space, we also insert a comma, // so that it pauses between sections. For the displayable // sentence "dial 123 456 7890" it will speak // "dial 1 2 3, 4 5 6, 7 8 9 0" char buffer[] = sentenceDisplay.toCharArray(); StringBuilder builder = new StringBuilder(); boolean buildingNumber = false; int l = sentenceDisplay.length(); for (int index = 0; index < l; index++) { char c = buffer[index]; if (Character.isDigit(c)) { if (buildingNumber) { builder.append(" "); } buildingNumber = true; builder.append(c); } else if (c == ' ') { if (buildingNumber) { builder.append(","); } else { builder.append(" "); } } else { buildingNumber = false; builder.append(c); } } return builder.toString(); } private void startActivityHelp(Intent intent) { startActivity(intent); } private void listenForCommand() { if (false) Log.d(TAG, "" + "Command(): MICROPHONE_EXTRA: "+getArg(MICROPHONE_EXTRA)+ ", CONTACTS_EXTRA: "+getArg(CONTACTS_EXTRA)); mState = WAITING_FOR_COMMAND; mRecognizerThread = new Thread() { public void run() { mCommandEngine.recognize(mCommandClient, VoiceDialerActivity.this, newFile(getArg(MICROPHONE_EXTRA)), mSampleRate); } }; mRecognizerThread.start(); } private void listenForChoice() { if (false) Log.d(TAG, "listenForChoice(): MICROPHONE_EXTRA: " + getArg(MICROPHONE_EXTRA)); mState = WAITING_FOR_CHOICE; mRecognizerThread = new Thread() { public void run() { mPhoneTypeChoiceEngine.recognize(mChoiceClient, VoiceDialerActivity.this, newFile(getArg(MICROPHONE_EXTRA)), mSampleRate); } }; mRecognizerThread.start(); } private void exitActivity() { synchronized(this) { if (mState != EXITING) { if (false) Log.d(TAG, "exitActivity"); mState = SPEAKING_GOODBYE; if (mUsingBluetooth) { mTtsParams.put(TextToSpeech.Engine.KEY_PARAM_UTTERANCE_ID, GOODBYE_UTTERANCE); mTts.speak(getString(R.string.goodbye_tts), TextToSpeech.QUEUE_FLUSH, mTtsParams); // Normally, the activity will finish() after the // utterance completes. As a fallback in case the utterance // does not complete, post a delayed runnable finish the // activity. mFallbackRunnable = new OnTtsCompletionRunnable(true); mHandler.postDelayed(mFallbackRunnable, MAX_TTS_DELAY); } else { mHandler.postDelayed(new Runnable() { public void run() { finish(); } }, EXIT_DELAY); } } } } private String getArg(String name) { if (name == null) return null; String arg = getIntent().getStringExtra(name); if (arg != null) return arg; arg = SystemProperties.get("app.voicedialer." + name); return arg != null && arg.length() > 0 ? arg : null; } private static File newFile(String name) { return name != null ? new File(name) : null; } private int playSound(int toneType) { int msecDelay = 1; // use the MediaPlayer to prompt the user if (mToneGenerator != null) { mToneGenerator.startTone(toneType); msecDelay = StrictMath.max(msecDelay, 300); } // use the Vibrator to prompt the user if (mAudioManager != null && mAudioManager.shouldVibrate(AudioManager.VIBRATE_TYPE_RINGER)) { final int VIBRATOR_TIME = 150; final int VIBRATOR_GUARD_TIME = 150; Vibrator vibrator = new Vibrator(); vibrator.vibrate(VIBRATOR_TIME); msecDelay = StrictMath.max(msecDelay, VIBRATOR_TIME + VIBRATOR_GUARD_TIME); } return msecDelay; } protected void onDestroy() { synchronized(this) { mState = EXITING; } if (mAlertDialog != null) { mAlertDialog.dismiss(); } // set the volume back to the level it was before we started. mAudioManager.setStreamVolume(AudioManager.STREAM_BLUETOOTH_SCO, mBluetoothVoiceVolume, 0); mAudioManager.abandonAudioFocus(null); // shut down bluetooth, if it exists if (mBluetoothHeadset != null) { mBluetoothHeadset.stopVoiceRecognition(mBluetoothDevice); mAdapter.closeProfileProxy(BluetoothProfile.HEADSET, mBluetoothHeadset); mBluetoothHeadset = null; } // shut down recognizer and wait for the thread to complete if (mRecognizerThread != null) { mRecognizerThread.interrupt(); try { mRecognizerThread.join(); } catch (InterruptedException e) { if (false) Log.d(TAG, "onStop mRecognizerThread.join exception " + e); } mRecognizerThread = null; } // clean up UI mHandler.removeCallbacks(mMicFlasher); mHandler.removeMessages(0); if (mTts != null) { mTts.stop(); mTts.shutdown(); mTts = null; } unregisterReceiver(mReceiver); super.onDestroy(); releaseWakeLock(); } private void acquireWakeLock(Context context) { if (mWakeLock == null) { PowerManager pm = (PowerManager)context.getSystemService(Context.POWER_SERVICE); mWakeLock = pm.newWakeLock(PowerManager.PARTIAL_WAKE_LOCK, "VoiceDialer"); mWakeLock.acquire(); } } private void releaseWakeLock() { if (mWakeLock != null) { mWakeLock.release(); mWakeLock = null; } } private Runnable mMicFlasher = new Runnable() { int visible = View.VISIBLE; public void run() { findViewById(R.id.microphone_view).setVisibility(visible); findViewById(R.id.state).setVisibility(visible); visible = visible == View.VISIBLE ? View.INVISIBLE : View.VISIBLE; mHandler.postDelayed(this, 750); } }; }