package com.darkprograms.speech.microphone;
import javax.sound.sampled.AudioFileFormat;
import com.darkprograms.speech.util.*;
/********************************************************************************************
* Microphone Analyzer class, detects pitch and volume while extending the microphone class.
* Implemented as a precursor to a Voice Activity Detection (VAD) algorithm.
* Currently can be used for audio data analysis.
* Dependencies: FFT.java & Complex.java. Both found in the utility package.
* @author Aaron Gokaslan
********************************************************************************************/
public class MicrophoneAnalyzer extends Microphone {
/**
* Constructor
* @param fileType The file type you want to save in. FLAC recommended.
*/
public MicrophoneAnalyzer(AudioFileFormat.Type fileType){
super(fileType);
}
/**
* Gets the volume of the microphone input
* Interval is 100ms so allow 100ms for this method to run in your code or specify smaller interval.
* @return The volume of the microphone input or -1 if data-line is not available
*/
public int getAudioVolume(){
return getAudioVolume(100);
}
/**
* Gets the volume of the microphone input
* @param interval: The length of time you would like to calculate the volume over in milliseconds.
* @return The volume of the microphone input or -1 if data-line is not available.
*/
public int getAudioVolume(int interval){
return calculateAudioVolume(this.getNumOfBytes(interval/1000d));
}
/**
* Gets the volume of microphone input
* @param numOfBytes The number of bytes you want for volume interpretation
* @return The volume over the specified number of bytes or -1 if data-line is unavailable.
*/
private int calculateAudioVolume(int numOfBytes){
byte[] data = getBytes(numOfBytes);
if(data==null)
return -1;
return calculateRMSLevel(data);
}
/**
* Calculates the volume of AudioData which may be buffered data from a data-line.
* @param audioData The byte[] you want to determine the volume of
* @return the calculated volume of audioData
*/
public static int calculateRMSLevel(byte[] audioData){
long lSum = 0;
for(int i=0; i<audioData.length; i++)
lSum = lSum + audioData[i];
double dAvg = lSum / audioData.length;
double sumMeanSquare = 0d;
for(int j=0; j<audioData.length; j++)
sumMeanSquare = sumMeanSquare + Math.pow(audioData[j] - dAvg, 2d);
double averageMeanSquare = sumMeanSquare / audioData.length;
return (int)(Math.pow(averageMeanSquare,0.5d) + 0.5);
}
/**
* Returns the number of bytes over interval for useful when figuring out how long to record.
* @param seconds The length in seconds
* @return the number of bytes the microphone will save.
*/
public int getNumOfBytes(int seconds){
return getNumOfBytes((double)seconds);
}
/**
* Returns the number of bytes over interval for useful when figuring out how long to record.
* @param seconds The length in seconds
* @return the number of bytes the microphone will output over the specified time.
*/
public int getNumOfBytes(double seconds){
return (int)(seconds*getAudioFormat().getSampleRate()*getAudioFormat().getFrameSize()+.5);
}
/**
* Returns the a byte[] containing the specified number of bytes
* @param numOfBytes The length of the returned array.
* @return The specified array or null if it cannot.
*/
private byte[] getBytes(int numOfBytes){
if(getTargetDataLine()!=null){
byte[] data = new byte[numOfBytes];
this.getTargetDataLine().read(data, 0, numOfBytes);
return data;
}
return null;//If data cannot be read, returns a null array.
}
/**
* Calculates the fundamental frequency. In other words, it calculates pitch,
* except pitch is far more subjective and subtle. Also note, that readings may occasionally,
* be in error due to the complex nature of sound. This feature is in Beta
* @return The frequency of the sound in Hertz.
*/
public int getFrequency(){
try {
return getFrequency(2048);
} catch (Exception e) {
//This will never happen. Ever...
return -666;
}
}
/**
* Calculates the frequency based off of the number of bytes.
* CAVEAT: THE NUMBER OF BYTES MUST BE A MULTIPLE OF 2!!!
* @param numOfBytes The number of bytes which must be a multiple of 2!!!
* @return The calculated frequency in Hertz.
*/
public int getFrequency(int numOfBytes) throws Exception{
if(getTargetDataLine() == null){
return -1;
}
byte[] data = new byte[numOfBytes+1];//One byte is lost during conversion
this.getTargetDataLine().read(data, 0, numOfBytes);
return getFrequency(data);
}
/**
* Calculates the frequency based off of the byte array,
* @param bytes The audioData you want to analyze
* @return The calculated frequency in Hertz.
*/
private int getFrequency(byte[] bytes){//This method requires an AudioFormat and cannot be static.
double[] audioData = this.bytesToDoubleArray(bytes);
Complex[] complex = new Complex[audioData.length];
for(int i = 0; i<complex.length; i++){
complex[i] = new Complex(audioData[i], 0);
}
Complex[] fftTransformed = FFT.fft(complex);
return calculateFundamentalFrequency(fftTransformed);
}
/**
* Iterates through the transformed data to calculate the frequency
* This data is only as accurate as the bin size. (See getBinSize(int))
* Fundamental Frequency = index of max magnitude (that isn't a harmotic) * bin size
* @param fftData The data you want to analyze
* @return The frequency in Hertz
*/
private int calculateFundamentalFrequency(Complex[] fftData){
int index = -1;
double max = Double.MIN_VALUE;
for(int i = 0; i<fftData.length/2; i++){
Complex complex = fftData[i];
double tmp = complex.getMagnitude();
if(tmp>max && !isHarmonic(i,index)){
max = tmp;
index = i;
}
}
return index*getFFTBinSize(fftData.length);
}
/**
* Determines whether or not a specific index constitutes a harmonic of a previous instance.
* Science: A harmonic frequency is a multiple of the fundamental frequency caused by interference.
* Note: Frequencies of an index 1 won't be treated as such since its frequency is so low.
* @param currentIndex The suspected harmonic frequency
* @param proposedIndex The suspected fundamental frequency
* @return True if it is a haromonic, false if it's not.
*/
private boolean isHarmonic(int currentIndex, int proposedIndex){
return (currentIndex>2 && proposedIndex>2 && currentIndex%proposedIndex==0);
}
/**
* Calculates the FFTbin size based off the length of the the array
* Each FFTBin size represents the range of frequencies treated as one.
* For example, if the bin size is 5 then the algorithm is precise to within 5hz.
* Precondition: length cannot be 0.
* @param fftDataLength The length of the array used to feed the FFT algorithm
* @return FFTBin size
*/
private int getFFTBinSize(int fftDataLength){
return (int)(getAudioFormat().getSampleRate()/fftDataLength+.5);
}
/**
* Converts bytes from a TargetDataLine into a double[] allowing the information to be read.
* NOTE: One byte is lost in the conversion so don't expect the arrays to be the same length!
* @param bufferData The buffer read in from the target data line
* @return The double[] that the buffer has been converted into.
*/
private double[] bytesToDoubleArray(byte[] bufferData){
final int bytesRecorded = bufferData.length;
final int bytesPerSample = getAudioFormat().getSampleSizeInBits()/8;
final double amplification = 100.0; // choose a number as you like
double[] micBufferData = new double[bytesRecorded - bytesPerSample +1];
for (int index = 0, floatIndex = 0; index < bytesRecorded - bytesPerSample + 1; index += bytesPerSample, floatIndex++) {
double sample = 0;
for (int b = 0; b < bytesPerSample; b++) {
int v = bufferData[index + b];
if (b < bytesPerSample - 1 || bytesPerSample == 1) {
v &= 0xFF;
}
sample += v << (b * 8);
}
double sample32 = amplification * (sample / 32768.0);
micBufferData[floatIndex] = sample32;
}
return micBufferData;
}
}