/*
* Copyright 2010-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package com.amazonaws.mobileconnectors.lex.interactionkit.internal.audio;
import android.content.Context;
import android.media.AudioRecord;
import android.os.SystemClock;
import android.util.Log;
import com.amazonaws.AmazonClientException;
import com.amazonaws.mobileconnectors.lex.interactionkit.internal.audio.encoder.AudioEncoder;
import com.amazonaws.mobileconnectors.lex.interactionkit.internal.vad.VoiceActivityDetector;
import com.amazonaws.mobileconnectors.lex.interactionkit.internal.vad.VoiceActivityDetector.VADState;
import com.google.common.base.Preconditions;
import java.io.IOException;
import java.io.InputStream;
import java.io.PipedInputStream;
import java.io.PipedOutputStream;
import java.util.concurrent.TimeUnit;
/**
* An AudioRecorder used to record audio and write audio data to an OutputStream
* that also supports voice activity detection and timeouts.
*/
public class LexAudioRecorder extends AudioRecorder implements ChunkedAudioSource {
/**
* The size of the each sample in bit.
*/
public static final int SAMPLE_SIZE = 16;
/**
* Audio encoder for compression.
*/
private final AudioEncoder mAudioEncoder;
/**
* Voice Activity Detector instance.
*/
private final VoiceActivityDetector mVAD;
/**
* Stream timeout.
*/
private final AudioTimeouts mAudioTimeouts;
/**
* Time when the recorder started recording.
*/
private long mRecordStartTime;
/**
* Stream to be read by wake word service.
*/
private PipedInputStream mConsumerStream;
/**
* Stream used to output recorded audio.
*/
private PipedOutputStream mProducerStream;
/**
* The state of the audio recorder before recording the next sample.
*/
private VADState state;
/**
* The state of the audio recorder after recording a sample.
*/
private VADState currentState;
/**
* Fired when no speech have been detected for the timeout duration set.
*/
private boolean noSpeechTimeoutFired;
/**
* Fired when speech is ongoing for longer than the timeout duration set.
*/
private boolean maxSpeechTimeoutFired;
/**
* The time the last state event occurred.
*/
private long lastEventTime;
/**
* Create a new recorder with given context, media type, sample rate,
* channel configuration, audio format, buffer size. VAD and stream timeout.
*
* @param context the context used to access the AudioRecord framework.
* @param audioEncoder the audio encoder to use for compression.
* @param vad the VAD used for startpointing and endpointing.
* @param audioTimeouts the audio timeouts used while recording audio.
* @param recorderPositionNotificationPeriod the notification period for
* updating the recorder position in milliseconds.
* @throws Exception if failed to create audio recorder instance.
*/
LexAudioRecorder(final Context context, final AudioEncoder audioEncoder,
final VoiceActivityDetector vad, final AudioTimeouts audioTimeouts,
final int recorderPositionNotificationPeriod) throws Exception {
this(context, audioEncoder, vad, audioTimeouts,
AudioRecord.getMinBufferSize(AudioRecorder.DEFAULT_SAMPLE_RATE,
AudioRecorder.DEFAULT_CHANNELS,
AudioRecorder.DEFAULT_FORMAT),
recorderPositionNotificationPeriod);
}
/**
* Create a new audio recorder with a custom record buffer size. Used for
* testing.
*
* @param context the Context used to access the AudioRecord framework.
* @param audioEncoder the audio encoder to use for compression.
* @param vad the VAD to be used for startpointing and endpointing.
* @param audioTimeouts the audio timeouts used while recording audio.
* @param numSamplesPerRead the number of samples to request when recording
* audio.
* @param recorderPositionNotificationPeriod the notification period for
* updating the recorder position in milliseconds.
* @throws Exception if failed to create audio recorder instance.
*/
protected LexAudioRecorder(final Context context, final AudioEncoder audioEncoder,
final VoiceActivityDetector vad, final AudioTimeouts audioTimeouts,
final int numSamplesPerRead, final int recorderPositionNotificationPeriod)
throws Exception {
super(context, audioEncoder.getMediaType(), recorderPositionNotificationPeriod,
AudioRecorder.DEFAULT_SAMPLE_RATE, AudioRecorder.DEFAULT_CHANNELS,
AudioRecorder.DEFAULT_FORMAT);
mAudioEncoder = Preconditions.checkNotNull(audioEncoder, "AudioEncoder cannot be null");
mVAD = Preconditions.checkNotNull(vad, "VAD cannot be null");
mAudioTimeouts = Preconditions.checkNotNull(audioTimeouts, "Audio timeouts cannot be null");
setupStreamPipe();
}
/**
* Create a new audio recorder with an existing AudioRecord object.
*
* @param context the Context used to access the AudioRecord framework.
* @param audioEncoder the audio encoder to use for compression.
* @param vad the VAD to be used for startpointing and endpointing.
* @param audioTimeouts the audio timeouts used while recording audio.
* @param numSamplesPerRead the number of samples to request when recording
* audio.
* @param recorderPositionNotificationPeriod the notification period for
* updating the recorder position in milliseconds.
* @param audioRecord the AudioRecord to use for audio recording.
* @throws Exception if failed to create audio recorder instance.
*/
public LexAudioRecorder(final Context context, final AudioEncoder audioEncoder,
final VoiceActivityDetector vad, final AudioTimeouts audioTimeouts,
final int numSamplesPerRead, final int recorderPositionNotificationPeriod,
final AudioRecord audioRecord) throws Exception {
super(context, audioEncoder.getMediaType(), numSamplesPerRead,
recorderPositionNotificationPeriod, audioRecord);
mAudioEncoder = Preconditions.checkNotNull(audioEncoder, "AudioEncoder cannot be null");
mVAD = Preconditions.checkNotNull(vad, "VAD cannot be null");
mAudioTimeouts = Preconditions.checkNotNull(audioTimeouts, "Audio timeouts cannot be null");
setupStreamPipe();
}
@Override
public int getChunkSize() {
return mAudioEncoder.getPacketSize();
}
@Override
public InputStream getConsumerStream() {
return mConsumerStream;
}
/**
* Set up the producer and consumer stream.
*
* @throws AudioSourceException thrown if failed to open consumer stream.
*/
protected void setupStreamPipe() throws AudioSourceException {
// Calculate pipe size.
final int maxTotalAudioLengthInMills = mAudioTimeouts.getNoSpeechTimeout()
+ mAudioTimeouts.getMaxSpeechTimeout();
final int pipeSize = AudioRecorder.DEFAULT_SAMPLE_RATE
* (int) TimeUnit.MILLISECONDS.toSeconds(maxTotalAudioLengthInMills)
* (SAMPLE_SIZE / Byte.SIZE);
// Set up the audio stream pipe.
mProducerStream = new PipedOutputStream();
try {
mConsumerStream = new PipedInputStream(mProducerStream, pipeSize);
} catch (final IOException e) {
throw new AudioSourceException("Error openning consumer stream", e);
}
}
/**
* Method to setup, start the recorder, and read data. When silence is
* detected or the recorder is stopped by the user, clean up resources.
*
* @throws Exception when there are problems while recording audio.
*/
@Override
public void startRecording() throws Exception {
state = VADState.NOT_STARTPOINTED;
noSpeechTimeoutFired = false;
maxSpeechTimeoutFired = false;
mRecordStartTime = SystemClock.elapsedRealtime();
lastEventTime = mRecordStartTime;
super.startRecording();
}
/**
* Determine whether recording should continue.
*
* @return whether recording should continue.
*/
@Override
protected boolean isInValidStateToContinueRecording() {
return !isCancelled() && state != VADState.ENDPOINTED;
}
/**
* Set the current audio recording state.
*/
@Override
protected void setPostRecordingFields() {
currentState = state;
}
/**
* Performs additional audio processing after a sample has been recorded.
* Compress audio samples and send to output stream. Update recorder state
* based on audio sample recorded.
*
* @param numSamplesRead number of samples read.
* @param buffer stores the audio samples.
* @param listener listens to audio recording events.
* @throws BluefrontAndroidException when there is a problem pushing to
* output stream.
*/
@Override
protected void postAudioRecordingProcessing(final int numSamplesRead,
final short[] buffer, final AudioSourceListener listener) throws AmazonClientException {
if (numSamplesRead > 0) {
// Process the samples through the VAD and get current VAD state.
currentState = mVAD.processSamples(buffer, numSamplesRead);
// Encode audio for sending to service.
final byte[] compressedBuffer = mAudioEncoder.encode(buffer, numSamplesRead);
try {
mProducerStream.write(compressedBuffer, 0, compressedBuffer.length);
mProducerStream.flush();
} catch (final IOException e) {
throw new AudioSourceException(
"Error writing to audio upload output stream", e);
}
}
// Use VAD state to determine which callbacks need to be invoked on the
// listener.
// And also the logic here about timeouts will also impact the pipe
// size.
final long timeSinceLastEvent = SystemClock.elapsedRealtime() - lastEventTime;
if (state == VADState.NOT_STARTPOINTED) {
if (currentState == VADState.STARTPOINTED) {
Log.v(TAG, "Speech detected");
listener.onBeginningOfSpeech();
} else if (!noSpeechTimeoutFired
&& timeSinceLastEvent >= mAudioTimeouts.getNoSpeechTimeout()) {
Log.i(TAG, "No speech timeout fired");
noSpeechTimeoutFired = true;
listener.onNoSpeechTimeout();
}
} else if (state == VADState.STARTPOINTED) {
if (currentState == VADState.ENDPOINTED) {
Log.v(TAG, "Silence detected");
listener.onSilenceDetected();
} else if (!maxSpeechTimeoutFired
&& timeSinceLastEvent >= mAudioTimeouts.getMaxSpeechTimeout()) {
Log.i(TAG, "Max speech timeout fired");
maxSpeechTimeoutFired = true;
listener.onMaxSpeechTimeout();
}
} else {
// This should never be reached, and if it is, it indicates a
// logical problem in the while loop.
throw new AudioSourceException("Invalid VAD state transition while processing audio");
}
// Update state if the endpointer state has changed.
if (state != currentState) {
lastEventTime = SystemClock.elapsedRealtime();
state = currentState;
}
}
/**
* Clean up the resources of components used by the audio recorder.
*
* @throws IOException when there is a problem closing the input stream.
*/
@Override
protected void cleanUpUtilityComponents() throws IOException {
mProducerStream.close();
Log.v(TAG, "Released producer stream");
mAudioEncoder.close();
Log.v(TAG, "Clean up audio encoder");
}
/**
* Return the producer stream.
*
* @return the producer stream.
*/
PipedOutputStream getProducerStream() {
return mProducerStream;
}
/**
* Get the state of the VAD.
*
* @return the state.
*/
VADState getState() {
return state;
}
/**
* Get the current state of the VAD.
*
* @return the current state.
*/
VADState getCurrentState() {
return currentState;
}
// TODO: Remove setters and replace with constructors.
/**
* Set the state.
*
* @param testState the state to set to.
*/
void setState(final VADState testState) {
state = testState;
}
/**
* Set the current state.
*
* @param testCurrentState the state to set to.
*/
void setCurrentState(final VADState testCurrentState) {
currentState = testCurrentState;
}
}