/* * Copyright (C) 2011 Jacquet Wong * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.musicg.api; import com.musicg.math.rank.ArrayRankDouble; import com.musicg.math.statistics.StandardDeviation; import com.musicg.math.statistics.ZeroCrossingRate; import com.musicg.wave.Wave; import com.musicg.wave.WaveHeader; import com.musicg.wave.extension.Spectrogram; /** * Api for detecting different sounds * * @author Jacquet Wong * */ public class DetectionApi { protected WaveHeader waveHeader; protected int fftSampleSize; protected int numFrequencyUnit; protected double unitFrequency; protected double minFrequency, maxFrequency; protected double minIntensity, maxIntensity; protected double minStandardDeviation, maxStandardDeviation; protected int highPass, lowPass; protected int minNumZeroCross, maxNumZeroCross; protected int lowerBoundary, upperBoundary; protected int numRobust; /** * Constructor, support mono Wav only, 4096 sample byte size for 44100Hz * 16bit mono wav * * @param sampleRate * Sample rate of the input audio byte * @param bitsPerSample * Bit size of a sample of the input audio byte */ public DetectionApi(WaveHeader waveHeader) { if (waveHeader.getChannels() == 1) { this.waveHeader = waveHeader; init(); } else { System.err.println("DetectionAPI supports mono Wav only"); } } /** * Initiate the settings for specific sound detection */ protected void init(){ // do nothing, needed to be overrided } /** * Determine the audio bytes contains a specific sound or not * * @param audioBytes * input audio byte * @return */ public boolean isSpecificSound(byte[] audioBytes) { int bytesPerSample = waveHeader.getBitsPerSample() / 8; int numSamples = audioBytes.length / bytesPerSample; // numSamples required to be a power of 2 if (numSamples > 0 && Integer.bitCount(numSamples) == 1) { fftSampleSize = numSamples; numFrequencyUnit = fftSampleSize / 2; // frequency could be caught within the half of nSamples according to Nyquist theory unitFrequency = (double) waveHeader.getSampleRate() / 2 / numFrequencyUnit; // set boundary lowerBoundary = (int) (highPass / unitFrequency); upperBoundary = (int) (lowPass / unitFrequency); // end set boundary Wave wave = new Wave(waveHeader, audioBytes); // audio bytes of this frame short[] amplitudes = wave.getSampleAmplitudes(); // spectrum for the clip Spectrogram spectrogram = wave.getSpectrogram(fftSampleSize, 0); double[][] spectrogramData = spectrogram.getAbsoluteSpectrogramData(); // since fftSampleSize==numSamples, there're only one spectrum which is thisFrameSpectrogramData[0] double[] spectrum = spectrogramData[0]; int frequencyUnitRange = upperBoundary - lowerBoundary + 1; double[] rangedSpectrum = new double[frequencyUnitRange]; System.arraycopy(spectrum, lowerBoundary, rangedSpectrum, 0, rangedSpectrum.length); if (frequencyUnitRange <= spectrum.length) { if (isPassedIntensity(spectrum)){ if (isPassedStandardDeviation(spectrogramData)){ if (isPassedZeroCrossingRate(amplitudes)){ if (isPassedFrequency(rangedSpectrum)){ return true; } } } } /* // run all checking for debug boolean isPassedChecking = true; // rule 1: check the intensity of this frame isPassedChecking &= isPassedIntensity(spectrum); // rule 2: check the frequency of this frame isPassedChecking &= isPassedFrequency(rangedSpectrum); // rule 3: check the zero crossing rate of this frame isPassedChecking &= isPassedZeroCrossingRate(amplitudes); // rule 4: check the standard deviation of this frame with reference of previous frames isPassedChecking &= isPassedStandardDeviation(spectrogramData); System.out.println("Result: " + isPassedChecking + "\n"); return isPassedChecking; // end run all checking for debug */ } else { System.err .println("is error: the wave needed to be higher sample rate"); } } else { System.out.println("The sample size must be a power of 2"); } return false; } protected void normalizeSpectrogramData(double[][] spectrogramData) { // normalization of absoultSpectrogram // set max and min amplitudes double maxAmp = Double.MIN_VALUE; double minAmp = Double.MAX_VALUE; for (int i = 0; i < spectrogramData.length; i++) { for (int j = 0; j < spectrogramData[i].length; j++){ if (spectrogramData[i][j] > maxAmp) { maxAmp = spectrogramData[i][j]; } else if (spectrogramData[i][j] < minAmp) { minAmp = spectrogramData[i][j]; } } } // end set max and min amplitudes // normalization // avoiding divided by zero double minValidAmp = 0.00000000001F; if (minAmp == 0) { minAmp = minValidAmp; } double diff = Math.log10(maxAmp / minAmp); // perceptual difference for (int i = 0; i < spectrogramData.length; i++) { for (int j = 0; j < spectrogramData[i].length; j++) { if (spectrogramData[i][j] < minValidAmp) { spectrogramData[i][j] = 0; } else { spectrogramData[i][j] = (Math.log10(spectrogramData[i][j] / minAmp)) / diff; } } } // end normalization } protected boolean isPassedStandardDeviation(double[][] spectrogramData){ // normalize the spectrogramData (with all frames in the spectrogram) normalizeSpectrogramData(spectrogramData); // analyst data in this frame // since fftSampleSize==numSamples, there're only one spectrum which is spectrogramData[last] double[] spectrum = spectrogramData[spectrogramData.length - 1]; // find top most robust frequencies in this frame double[] robustFrequencies = new double[numRobust]; ArrayRankDouble arrayRankDouble = new ArrayRankDouble(); double nthValue = arrayRankDouble.getNthOrderedValue(spectrum, numRobust, false); // end analyst data in this frame int count = 0; for (int i = 0; i < spectrum.length; i++) { if (spectrum[i] >= nthValue) { robustFrequencies[count++] = spectrum[i]; if (count >= numRobust) { break; } } } // end find top most robust frequencies StandardDeviation standardDeviation = new StandardDeviation(); standardDeviation.setValues(robustFrequencies); double sd = standardDeviation.evaluate(); // range of standard deviation boolean result = (sd >= minStandardDeviation && sd <= maxStandardDeviation); //System.out.println("sd: " + sd + " " + result); return result; } protected boolean isPassedFrequency(double[] spectrum){ // find the robust frequency ArrayRankDouble arrayRankDouble = new ArrayRankDouble(); double robustFrequency = arrayRankDouble.getMaxValueIndex(spectrum) * unitFrequency; // frequency of the sound should not be too low or too high boolean result = (robustFrequency >= minFrequency && robustFrequency <= maxFrequency); //System.out.println("freq: " + robustFrequency + " " + result); return result; } protected boolean isPassedIntensity(double[] spectrum){ // get the average intensity of the signal double intensity = 0; for (int i = 0; i < spectrum.length; i++) { intensity += spectrum[i]; } intensity /= spectrum.length; // end get the average intensity of the signal // intensity of the whistle should not be too soft boolean result = (intensity > minIntensity && intensity <= maxIntensity); //System.out.println("intensity: " + intensity + " " + result); return result; } protected boolean isPassedZeroCrossingRate(short[] amplitudes){ ZeroCrossingRate zcr = new ZeroCrossingRate(amplitudes, 1); int numZeroCrosses = (int) zcr.evaluate(); // different sound has different range of zero crossing value // when lengthInSecond=1, zero crossing rate is the num // of zero crosses boolean result = (numZeroCrosses >= minNumZeroCross && numZeroCrosses <= maxNumZeroCross); //System.out.println("zcr: " + numZeroCrosses + " " +result); return result; } }