/* * Copyright (C) 2011 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package android.speech.tts; import android.media.AudioFormat; import android.media.AudioTrack; import android.text.TextUtils; import android.util.Log; import java.util.Iterator; import java.util.concurrent.PriorityBlockingQueue; import java.util.concurrent.atomic.AtomicLong; class AudioPlaybackHandler { private static final String TAG = "TTS.AudioPlaybackHandler"; private static final boolean DBG_THREADING = false; private static final boolean DBG = false; private static final int MIN_AUDIO_BUFFER_SIZE = 8192; private static final int SYNTHESIS_START = 1; private static final int SYNTHESIS_DATA_AVAILABLE = 2; private static final int SYNTHESIS_DONE = 3; private static final int PLAY_AUDIO = 5; private static final int PLAY_SILENCE = 6; private static final int SHUTDOWN = -1; private static final int DEFAULT_PRIORITY = 1; private static final int HIGH_PRIORITY = 0; private final PriorityBlockingQueue<ListEntry> mQueue = new PriorityBlockingQueue<ListEntry>(); private final Thread mHandlerThread; private volatile MessageParams mCurrentParams = null; // Used only for book keeping and error detection. private volatile SynthesisMessageParams mLastSynthesisRequest = null; // Used to order incoming messages in our priority queue. private final AtomicLong mSequenceIdCtr = new AtomicLong(0); AudioPlaybackHandler() { mHandlerThread = new Thread(new MessageLoop(), "TTS.AudioPlaybackThread"); } public void start() { mHandlerThread.start(); } /** * Stops all synthesis for a given {@code token}. If the current token * is currently being processed, an effort will be made to stop it but * that is not guaranteed. * * NOTE: This assumes that all other messages in the queue with {@code token} * have been removed already. * * NOTE: Must be called synchronized on {@code AudioPlaybackHandler.this}. */ private void stop(MessageParams token) { if (token == null) { return; } if (DBG) Log.d(TAG, "Stopping token : " + token); if (token.getType() == MessageParams.TYPE_SYNTHESIS) { AudioTrack current = ((SynthesisMessageParams) token).getAudioTrack(); if (current != null) { // Stop the current audio track if it's still playing. // The audio track is thread safe in this regard. The current // handleSynthesisDataAvailable call will return soon after this // call. current.stop(); } // This is safe because PlaybackSynthesisCallback#stop would have // been called before this method, and will no longer enqueue any // audio for this token. // // (Even if it did, all it would result in is a warning message). mQueue.add(new ListEntry(SYNTHESIS_DONE, token, HIGH_PRIORITY)); } else if (token.getType() == MessageParams.TYPE_AUDIO) { ((AudioMessageParams) token).getPlayer().stop(); // No cleanup required for audio messages. } else if (token.getType() == MessageParams.TYPE_SILENCE) { ((SilenceMessageParams) token).getConditionVariable().open(); // No cleanup required for silence messages. } } // ----------------------------------------------------- // Methods that add and remove elements from the queue. These do not // need to be synchronized strictly speaking, but they make the behaviour // a lot more predictable. (though it would still be correct without // synchronization). // ----------------------------------------------------- synchronized public void removePlaybackItems(String callingApp) { if (DBG_THREADING) Log.d(TAG, "Removing all callback items for : " + callingApp); removeMessages(callingApp); final MessageParams current = getCurrentParams(); if (current != null && TextUtils.equals(callingApp, current.getCallingApp())) { stop(current); } final MessageParams lastSynthesis = mLastSynthesisRequest; if (lastSynthesis != null && lastSynthesis != current && TextUtils.equals(callingApp, lastSynthesis.getCallingApp())) { stop(lastSynthesis); } } synchronized public void removeAllItems() { if (DBG_THREADING) Log.d(TAG, "Removing all items"); removeAllMessages(); final MessageParams current = getCurrentParams(); final MessageParams lastSynthesis = mLastSynthesisRequest; stop(current); if (lastSynthesis != null && lastSynthesis != current) { stop(lastSynthesis); } } /** * @return false iff the queue is empty and no queue item is currently * being handled, true otherwise. */ public boolean isSpeaking() { return (mQueue.peek() != null) || (mCurrentParams != null); } /** * Shut down the audio playback thread. */ synchronized public void quit() { removeAllMessages(); stop(getCurrentParams()); mQueue.add(new ListEntry(SHUTDOWN, null, HIGH_PRIORITY)); } synchronized void enqueueSynthesisStart(SynthesisMessageParams token) { if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis start : " + token); mQueue.add(new ListEntry(SYNTHESIS_START, token)); } synchronized void enqueueSynthesisDataAvailable(SynthesisMessageParams token) { if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis data available : " + token); mQueue.add(new ListEntry(SYNTHESIS_DATA_AVAILABLE, token)); } synchronized void enqueueSynthesisDone(SynthesisMessageParams token) { if (DBG_THREADING) Log.d(TAG, "Enqueuing synthesis done : " + token); mQueue.add(new ListEntry(SYNTHESIS_DONE, token)); } synchronized void enqueueAudio(AudioMessageParams token) { if (DBG_THREADING) Log.d(TAG, "Enqueuing audio : " + token); mQueue.add(new ListEntry(PLAY_AUDIO, token)); } synchronized void enqueueSilence(SilenceMessageParams token) { if (DBG_THREADING) Log.d(TAG, "Enqueuing silence : " + token); mQueue.add(new ListEntry(PLAY_SILENCE, token)); } // ----------------------------------------- // End of public API methods. // ----------------------------------------- // ----------------------------------------- // Methods for managing the message queue. // ----------------------------------------- /* * The MessageLoop is a handler like implementation that * processes messages from a priority queue. */ private final class MessageLoop implements Runnable { @Override public void run() { while (true) { ListEntry entry = null; try { entry = mQueue.take(); } catch (InterruptedException ie) { return; } if (entry.mWhat == SHUTDOWN) { if (DBG) Log.d(TAG, "MessageLoop : Shutting down"); return; } if (DBG) { Log.d(TAG, "MessageLoop : Handling message :" + entry.mWhat + " ,seqId : " + entry.mSequenceId); } setCurrentParams(entry.mMessage); handleMessage(entry); setCurrentParams(null); } } } /* * Atomically clear the queue of all messages. */ synchronized private void removeAllMessages() { mQueue.clear(); } /* * Remove all messages that originate from a given calling app. */ synchronized private void removeMessages(String callingApp) { Iterator<ListEntry> it = mQueue.iterator(); while (it.hasNext()) { final ListEntry current = it.next(); // The null check is to prevent us from removing control messages, // such as a shutdown message. if (current.mMessage != null && callingApp.equals(current.mMessage.getCallingApp())) { it.remove(); } } } /* * An element of our priority queue of messages. Each message has a priority, * and a sequence id (defined by the order of enqueue calls). Among messages * with the same priority, messages that were received earlier win out. */ private final class ListEntry implements Comparable<ListEntry> { final int mWhat; final MessageParams mMessage; final int mPriority; final long mSequenceId; private ListEntry(int what, MessageParams message) { this(what, message, DEFAULT_PRIORITY); } private ListEntry(int what, MessageParams message, int priority) { mWhat = what; mMessage = message; mPriority = priority; mSequenceId = mSequenceIdCtr.incrementAndGet(); } @Override public int compareTo(ListEntry that) { if (that == this) { return 0; } // Note that this is always 0, 1 or -1. int priorityDiff = mPriority - that.mPriority; if (priorityDiff == 0) { // The == case cannot occur. return (mSequenceId < that.mSequenceId) ? -1 : 1; } return priorityDiff; } } private void setCurrentParams(MessageParams p) { if (DBG_THREADING) { if (p != null) { Log.d(TAG, "Started handling :" + p); } else { Log.d(TAG, "End handling : " + mCurrentParams); } } mCurrentParams = p; } private MessageParams getCurrentParams() { return mCurrentParams; } // ----------------------------------------- // Methods for dealing with individual messages, the methods // below do the actual work. // ----------------------------------------- private void handleMessage(ListEntry entry) { final MessageParams msg = entry.mMessage; if (entry.mWhat == SYNTHESIS_START) { handleSynthesisStart(msg); } else if (entry.mWhat == SYNTHESIS_DATA_AVAILABLE) { handleSynthesisDataAvailable(msg); } else if (entry.mWhat == SYNTHESIS_DONE) { handleSynthesisDone(msg); } else if (entry.mWhat == PLAY_AUDIO) { handleAudio(msg); } else if (entry.mWhat == PLAY_SILENCE) { handleSilence(msg); } } // Currently implemented as blocking the audio playback thread for the // specified duration. If a call to stop() is made, the thread // unblocks. private void handleSilence(MessageParams msg) { if (DBG) Log.d(TAG, "handleSilence()"); SilenceMessageParams params = (SilenceMessageParams) msg; params.getDispatcher().dispatchOnStart(); if (params.getSilenceDurationMs() > 0) { params.getConditionVariable().block(params.getSilenceDurationMs()); } params.getDispatcher().dispatchOnDone(); if (DBG) Log.d(TAG, "handleSilence() done."); } // Plays back audio from a given URI. No TTS engine involvement here. private void handleAudio(MessageParams msg) { if (DBG) Log.d(TAG, "handleAudio()"); AudioMessageParams params = (AudioMessageParams) msg; params.getDispatcher().dispatchOnStart(); // Note that the BlockingMediaPlayer spawns a separate thread. // // TODO: This can be avoided. params.getPlayer().startAndWait(); params.getDispatcher().dispatchOnDone(); if (DBG) Log.d(TAG, "handleAudio() done."); } // Denotes the start of a new synthesis request. We create a new // audio track, and prepare it for incoming data. // // Note that since all TTS synthesis happens on a single thread, we // should ALWAYS see the following order : // // handleSynthesisStart -> handleSynthesisDataAvailable(*) -> handleSynthesisDone // OR // handleSynthesisCompleteDataAvailable. private void handleSynthesisStart(MessageParams msg) { if (DBG) Log.d(TAG, "handleSynthesisStart()"); final SynthesisMessageParams param = (SynthesisMessageParams) msg; // Oops, looks like the engine forgot to call done(). We go through // extra trouble to clean the data to prevent the AudioTrack resources // from being leaked. if (mLastSynthesisRequest != null) { Log.e(TAG, "Error : Missing call to done() for request : " + mLastSynthesisRequest); handleSynthesisDone(mLastSynthesisRequest); } mLastSynthesisRequest = param; // Create the audio track. final AudioTrack audioTrack = createStreamingAudioTrack(param); if (DBG) Log.d(TAG, "Created audio track [" + audioTrack.hashCode() + "]"); param.setAudioTrack(audioTrack); msg.getDispatcher().dispatchOnStart(); } // More data available to be flushed to the audio track. private void handleSynthesisDataAvailable(MessageParams msg) { final SynthesisMessageParams param = (SynthesisMessageParams) msg; if (param.getAudioTrack() == null) { Log.w(TAG, "Error : null audio track in handleDataAvailable : " + param); return; } if (param != mLastSynthesisRequest) { Log.e(TAG, "Call to dataAvailable without done() / start()"); return; } final AudioTrack audioTrack = param.getAudioTrack(); final SynthesisMessageParams.ListEntry bufferCopy = param.getNextBuffer(); if (bufferCopy == null) { Log.e(TAG, "No buffers available to play."); return; } int playState = audioTrack.getPlayState(); if (playState == AudioTrack.PLAYSTATE_STOPPED) { if (DBG) Log.d(TAG, "AudioTrack stopped, restarting : " + audioTrack.hashCode()); audioTrack.play(); } int count = 0; while (count < bufferCopy.mBytes.length) { // Note that we don't take bufferCopy.mOffset into account because // it is guaranteed to be 0. int written = audioTrack.write(bufferCopy.mBytes, count, bufferCopy.mBytes.length); if (written <= 0) { break; } count += written; } param.mBytesWritten += count; param.mLogger.onPlaybackStart(); } // Wait for the audio track to stop playing, and then release its resources. private void handleSynthesisDone(MessageParams msg) { final SynthesisMessageParams params = (SynthesisMessageParams) msg; if (DBG) Log.d(TAG, "handleSynthesisDone()"); final AudioTrack audioTrack = params.getAudioTrack(); if (audioTrack == null) { // There was already a call to handleSynthesisDone for // this token. return; } if (params.mBytesWritten < params.mAudioBufferSize) { if (DBG) Log.d(TAG, "Stopping audio track to flush audio, state was : " + audioTrack.getPlayState()); params.mIsShortUtterance = true; audioTrack.stop(); } if (DBG) Log.d(TAG, "Waiting for audio track to complete : " + audioTrack.hashCode()); blockUntilDone(params); if (DBG) Log.d(TAG, "Releasing audio track [" + audioTrack.hashCode() + "]"); // The last call to AudioTrack.write( ) will return only after // all data from the audioTrack has been sent to the mixer, so // it's safe to release at this point. Make sure release() and the call // that set the audio track to null are performed atomically. synchronized (this) { // Never allow the audioTrack to be observed in a state where // it is released but non null. The only case this might happen // is in the various stopFoo methods that call AudioTrack#stop from // different threads, but they are synchronized on AudioPlayBackHandler#this // too. audioTrack.release(); params.setAudioTrack(null); } if (params.isError()) { params.getDispatcher().dispatchOnError(); } else { params.getDispatcher().dispatchOnDone(); } mLastSynthesisRequest = null; params.mLogger.onWriteData(); } /** * The minimum increment of time to wait for an audiotrack to finish * playing. */ private static final long MIN_SLEEP_TIME_MS = 20; /** * The maximum increment of time to sleep while waiting for an audiotrack * to finish playing. */ private static final long MAX_SLEEP_TIME_MS = 2500; /** * The maximum amount of time to wait for an audio track to make progress while * it remains in PLAYSTATE_PLAYING. This should never happen in normal usage, but * could happen in exceptional circumstances like a media_server crash. */ private static final long MAX_PROGRESS_WAIT_MS = MAX_SLEEP_TIME_MS; private static void blockUntilDone(SynthesisMessageParams params) { if (params.mAudioTrack == null || params.mBytesWritten <= 0) { return; } if (params.mIsShortUtterance) { // In this case we would have called AudioTrack#stop() to flush // buffers to the mixer. This makes the playback head position // unobservable and notification markers do not work reliably. We // have no option but to wait until we think the track would finish // playing and release it after. // // This isn't as bad as it looks because (a) We won't end up waiting // for much longer than we should because even at 4khz mono, a short // utterance weighs in at about 2 seconds, and (b) such short utterances // are expected to be relatively infrequent and in a stream of utterances // this shows up as a slightly longer pause. blockUntilEstimatedCompletion(params); } else { blockUntilCompletion(params); } } private static void blockUntilEstimatedCompletion(SynthesisMessageParams params) { final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame; final long estimatedTimeMs = (lengthInFrames * 1000 / params.mSampleRateInHz); if (DBG) Log.d(TAG, "About to sleep for: " + estimatedTimeMs + "ms for a short utterance"); try { Thread.sleep(estimatedTimeMs); } catch (InterruptedException ie) { // Do nothing. } } private static void blockUntilCompletion(SynthesisMessageParams params) { final AudioTrack audioTrack = params.mAudioTrack; final int lengthInFrames = params.mBytesWritten / params.mBytesPerFrame; int previousPosition = -1; int currentPosition = 0; long blockedTimeMs = 0; while ((currentPosition = audioTrack.getPlaybackHeadPosition()) < lengthInFrames && audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING) { final long estimatedTimeMs = ((lengthInFrames - currentPosition) * 1000) / audioTrack.getSampleRate(); final long sleepTimeMs = clip(estimatedTimeMs, MIN_SLEEP_TIME_MS, MAX_SLEEP_TIME_MS); // Check if the audio track has made progress since the last loop // iteration. We should then add in the amount of time that was // spent sleeping in the last iteration. if (currentPosition == previousPosition) { // This works only because the sleep time that would have been calculated // would be the same in the previous iteration too. blockedTimeMs += sleepTimeMs; // If we've taken too long to make progress, bail. if (blockedTimeMs > MAX_PROGRESS_WAIT_MS) { Log.w(TAG, "Waited unsuccessfully for " + MAX_PROGRESS_WAIT_MS + "ms " + "for AudioTrack to make progress, Aborting"); break; } } else { blockedTimeMs = 0; } previousPosition = currentPosition; if (DBG) Log.d(TAG, "About to sleep for : " + sleepTimeMs + " ms," + " Playback position : " + currentPosition + ", Length in frames : " + lengthInFrames); try { Thread.sleep(sleepTimeMs); } catch (InterruptedException ie) { break; } } } private static final long clip(long value, long min, long max) { if (value < min) { return min; } if (value > max) { return max; } return value; } private static AudioTrack createStreamingAudioTrack(SynthesisMessageParams params) { final int channelConfig = getChannelConfig(params.mChannelCount); final int sampleRateInHz = params.mSampleRateInHz; final int audioFormat = params.mAudioFormat; int minBufferSizeInBytes = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat); int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); AudioTrack audioTrack = new AudioTrack(params.mStreamType, sampleRateInHz, channelConfig, audioFormat, bufferSizeInBytes, AudioTrack.MODE_STREAM); if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { Log.w(TAG, "Unable to create audio track."); audioTrack.release(); return null; } params.mAudioBufferSize = bufferSizeInBytes; setupVolume(audioTrack, params.mVolume, params.mPan); return audioTrack; } static int getChannelConfig(int channelCount) { if (channelCount == 1) { return AudioFormat.CHANNEL_OUT_MONO; } else if (channelCount == 2){ return AudioFormat.CHANNEL_OUT_STEREO; } return 0; } private static void setupVolume(AudioTrack audioTrack, float volume, float pan) { float vol = clip(volume, 0.0f, 1.0f); float panning = clip(pan, -1.0f, 1.0f); float volLeft = vol; float volRight = vol; if (panning > 0.0f) { volLeft *= (1.0f - panning); } else if (panning < 0.0f) { volRight *= (1.0f + panning); } if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { Log.e(TAG, "Failed to set volume"); } } private static float clip(float value, float min, float max) { return value > max ? max : (value < min ? min : value); } }