/* * Copyright 2010-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/apache2.0 * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.mobileconnectors.lex.interactionkit.internal.audio; import android.content.Context; import android.media.AudioRecord; import android.os.SystemClock; import android.util.Log; import com.amazonaws.AmazonClientException; import com.amazonaws.mobileconnectors.lex.interactionkit.internal.audio.encoder.AudioEncoder; import com.amazonaws.mobileconnectors.lex.interactionkit.internal.vad.VoiceActivityDetector; import com.amazonaws.mobileconnectors.lex.interactionkit.internal.vad.VoiceActivityDetector.VADState; import com.google.common.base.Preconditions; import java.io.IOException; import java.io.InputStream; import java.io.PipedInputStream; import java.io.PipedOutputStream; import java.util.concurrent.TimeUnit; /** * An AudioRecorder used to record audio and write audio data to an OutputStream * that also supports voice activity detection and timeouts. */ public class LexAudioRecorder extends AudioRecorder implements ChunkedAudioSource { /** * The size of the each sample in bit. */ public static final int SAMPLE_SIZE = 16; /** * Audio encoder for compression. */ private final AudioEncoder mAudioEncoder; /** * Voice Activity Detector instance. */ private final VoiceActivityDetector mVAD; /** * Stream timeout. */ private final AudioTimeouts mAudioTimeouts; /** * Time when the recorder started recording. */ private long mRecordStartTime; /** * Stream to be read by wake word service. */ private PipedInputStream mConsumerStream; /** * Stream used to output recorded audio. */ private PipedOutputStream mProducerStream; /** * The state of the audio recorder before recording the next sample. */ private VADState state; /** * The state of the audio recorder after recording a sample. */ private VADState currentState; /** * Fired when no speech have been detected for the timeout duration set. */ private boolean noSpeechTimeoutFired; /** * Fired when speech is ongoing for longer than the timeout duration set. */ private boolean maxSpeechTimeoutFired; /** * The time the last state event occurred. */ private long lastEventTime; /** * Create a new recorder with given context, media type, sample rate, * channel configuration, audio format, buffer size. VAD and stream timeout. * * @param context the context used to access the AudioRecord framework. * @param audioEncoder the audio encoder to use for compression. * @param vad the VAD used for startpointing and endpointing. * @param audioTimeouts the audio timeouts used while recording audio. * @param recorderPositionNotificationPeriod the notification period for * updating the recorder position in milliseconds. * @throws Exception if failed to create audio recorder instance. */ LexAudioRecorder(final Context context, final AudioEncoder audioEncoder, final VoiceActivityDetector vad, final AudioTimeouts audioTimeouts, final int recorderPositionNotificationPeriod) throws Exception { this(context, audioEncoder, vad, audioTimeouts, AudioRecord.getMinBufferSize(AudioRecorder.DEFAULT_SAMPLE_RATE, AudioRecorder.DEFAULT_CHANNELS, AudioRecorder.DEFAULT_FORMAT), recorderPositionNotificationPeriod); } /** * Create a new audio recorder with a custom record buffer size. Used for * testing. * * @param context the Context used to access the AudioRecord framework. * @param audioEncoder the audio encoder to use for compression. * @param vad the VAD to be used for startpointing and endpointing. * @param audioTimeouts the audio timeouts used while recording audio. * @param numSamplesPerRead the number of samples to request when recording * audio. * @param recorderPositionNotificationPeriod the notification period for * updating the recorder position in milliseconds. * @throws Exception if failed to create audio recorder instance. */ protected LexAudioRecorder(final Context context, final AudioEncoder audioEncoder, final VoiceActivityDetector vad, final AudioTimeouts audioTimeouts, final int numSamplesPerRead, final int recorderPositionNotificationPeriod) throws Exception { super(context, audioEncoder.getMediaType(), recorderPositionNotificationPeriod, AudioRecorder.DEFAULT_SAMPLE_RATE, AudioRecorder.DEFAULT_CHANNELS, AudioRecorder.DEFAULT_FORMAT); mAudioEncoder = Preconditions.checkNotNull(audioEncoder, "AudioEncoder cannot be null"); mVAD = Preconditions.checkNotNull(vad, "VAD cannot be null"); mAudioTimeouts = Preconditions.checkNotNull(audioTimeouts, "Audio timeouts cannot be null"); setupStreamPipe(); } /** * Create a new audio recorder with an existing AudioRecord object. * * @param context the Context used to access the AudioRecord framework. * @param audioEncoder the audio encoder to use for compression. * @param vad the VAD to be used for startpointing and endpointing. * @param audioTimeouts the audio timeouts used while recording audio. * @param numSamplesPerRead the number of samples to request when recording * audio. * @param recorderPositionNotificationPeriod the notification period for * updating the recorder position in milliseconds. * @param audioRecord the AudioRecord to use for audio recording. * @throws Exception if failed to create audio recorder instance. */ public LexAudioRecorder(final Context context, final AudioEncoder audioEncoder, final VoiceActivityDetector vad, final AudioTimeouts audioTimeouts, final int numSamplesPerRead, final int recorderPositionNotificationPeriod, final AudioRecord audioRecord) throws Exception { super(context, audioEncoder.getMediaType(), numSamplesPerRead, recorderPositionNotificationPeriod, audioRecord); mAudioEncoder = Preconditions.checkNotNull(audioEncoder, "AudioEncoder cannot be null"); mVAD = Preconditions.checkNotNull(vad, "VAD cannot be null"); mAudioTimeouts = Preconditions.checkNotNull(audioTimeouts, "Audio timeouts cannot be null"); setupStreamPipe(); } @Override public int getChunkSize() { return mAudioEncoder.getPacketSize(); } @Override public InputStream getConsumerStream() { return mConsumerStream; } /** * Set up the producer and consumer stream. * * @throws AudioSourceException thrown if failed to open consumer stream. */ protected void setupStreamPipe() throws AudioSourceException { // Calculate pipe size. final int maxTotalAudioLengthInMills = mAudioTimeouts.getNoSpeechTimeout() + mAudioTimeouts.getMaxSpeechTimeout(); final int pipeSize = AudioRecorder.DEFAULT_SAMPLE_RATE * (int) TimeUnit.MILLISECONDS.toSeconds(maxTotalAudioLengthInMills) * (SAMPLE_SIZE / Byte.SIZE); // Set up the audio stream pipe. mProducerStream = new PipedOutputStream(); try { mConsumerStream = new PipedInputStream(mProducerStream, pipeSize); } catch (final IOException e) { throw new AudioSourceException("Error openning consumer stream", e); } } /** * Method to setup, start the recorder, and read data. When silence is * detected or the recorder is stopped by the user, clean up resources. * * @throws Exception when there are problems while recording audio. */ @Override public void startRecording() throws Exception { state = VADState.NOT_STARTPOINTED; noSpeechTimeoutFired = false; maxSpeechTimeoutFired = false; mRecordStartTime = SystemClock.elapsedRealtime(); lastEventTime = mRecordStartTime; super.startRecording(); } /** * Determine whether recording should continue. * * @return whether recording should continue. */ @Override protected boolean isInValidStateToContinueRecording() { return !isCancelled() && state != VADState.ENDPOINTED; } /** * Set the current audio recording state. */ @Override protected void setPostRecordingFields() { currentState = state; } /** * Performs additional audio processing after a sample has been recorded. * Compress audio samples and send to output stream. Update recorder state * based on audio sample recorded. * * @param numSamplesRead number of samples read. * @param buffer stores the audio samples. * @param listener listens to audio recording events. * @throws BluefrontAndroidException when there is a problem pushing to * output stream. */ @Override protected void postAudioRecordingProcessing(final int numSamplesRead, final short[] buffer, final AudioSourceListener listener) throws AmazonClientException { if (numSamplesRead > 0) { // Process the samples through the VAD and get current VAD state. currentState = mVAD.processSamples(buffer, numSamplesRead); // Encode audio for sending to service. final byte[] compressedBuffer = mAudioEncoder.encode(buffer, numSamplesRead); try { mProducerStream.write(compressedBuffer, 0, compressedBuffer.length); mProducerStream.flush(); } catch (final IOException e) { throw new AudioSourceException( "Error writing to audio upload output stream", e); } } // Use VAD state to determine which callbacks need to be invoked on the // listener. // And also the logic here about timeouts will also impact the pipe // size. final long timeSinceLastEvent = SystemClock.elapsedRealtime() - lastEventTime; if (state == VADState.NOT_STARTPOINTED) { if (currentState == VADState.STARTPOINTED) { Log.v(TAG, "Speech detected"); listener.onBeginningOfSpeech(); } else if (!noSpeechTimeoutFired && timeSinceLastEvent >= mAudioTimeouts.getNoSpeechTimeout()) { Log.i(TAG, "No speech timeout fired"); noSpeechTimeoutFired = true; listener.onNoSpeechTimeout(); } } else if (state == VADState.STARTPOINTED) { if (currentState == VADState.ENDPOINTED) { Log.v(TAG, "Silence detected"); listener.onSilenceDetected(); } else if (!maxSpeechTimeoutFired && timeSinceLastEvent >= mAudioTimeouts.getMaxSpeechTimeout()) { Log.i(TAG, "Max speech timeout fired"); maxSpeechTimeoutFired = true; listener.onMaxSpeechTimeout(); } } else { // This should never be reached, and if it is, it indicates a // logical problem in the while loop. throw new AudioSourceException("Invalid VAD state transition while processing audio"); } // Update state if the endpointer state has changed. if (state != currentState) { lastEventTime = SystemClock.elapsedRealtime(); state = currentState; } } /** * Clean up the resources of components used by the audio recorder. * * @throws IOException when there is a problem closing the input stream. */ @Override protected void cleanUpUtilityComponents() throws IOException { mProducerStream.close(); Log.v(TAG, "Released producer stream"); mAudioEncoder.close(); Log.v(TAG, "Clean up audio encoder"); } /** * Return the producer stream. * * @return the producer stream. */ PipedOutputStream getProducerStream() { return mProducerStream; } /** * Get the state of the VAD. * * @return the state. */ VADState getState() { return state; } /** * Get the current state of the VAD. * * @return the current state. */ VADState getCurrentState() { return currentState; } // TODO: Remove setters and replace with constructors. /** * Set the state. * * @param testState the state to set to. */ void setState(final VADState testState) { state = testState; } /** * Set the current state. * * @param testCurrentState the state to set to. */ void setCurrentState(final VADState testCurrentState) { currentState = testCurrentState; } }