/*
* Copyright 1999-2004 Carnegie Mellon University.
* Portions Copyright 2002-2004 Sun Microsystems, Inc.
* Portions Copyright 2002-2004 Mitsubishi Electric Research Laboratories.
* All Rights Reserved. Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
package edu.cmu.sphinx.frontend.window;
import edu.cmu.sphinx.frontend.*;
import edu.cmu.sphinx.frontend.endpoint.*;
import edu.cmu.sphinx.frontend.util.DataUtil;
import edu.cmu.sphinx.util.props.PropertyException;
import edu.cmu.sphinx.util.props.PropertySheet;
import edu.cmu.sphinx.util.props.S4Double;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
* Slices up a Data object into a number of overlapping windows (usually referred to as "frames" in the speech world). In
* order to minimize the signal discontinuities at the boundaries of each frame, we multiply each frame with a raised
* cosine windowing function. Moreover, the system uses overlapping windows to capture information that may occur at the
* window boundaries. These events would not be well represented if the windows were juxtaposed. <p> The number of
* resulting windows depends on the {@link #PROP_WINDOW_SIZE_MS window size} and the {@link #PROP_WINDOW_SHIFT_MS window
* shift} (commonly known as frame shift in speech world). Figure 1 shows the relationship between the original data
* stream, the window size, the window shift, and the windows returned. <p> <img alt="Framing" src="doc-files/framing.jpg">
* <br><b>Figure 1: Relationship between original data, window size, window shift, and the windows returned.</b> <p> The
* raised cosine windowing function will be applied to each such window. Since the {@link #getData()} method returns a
* window, and multiple windows are created for each Data object, this is a 1-to-many processor. Also note that the
* returned windows should have the same number of data points as the windowing function. <p> The applied windowing
* function, <i>W(n)</i>, of length <i>N</i> (the window size), is given by the following formula:
* <pre>
* W(n) = (1-a) - (a * cos((2 * Math.PI * n)/(N - 1)))
* </pre>
* where <b>a</b> is commonly known as the "alpha" value. This variable can be set by the user using the property
* defined by {@link #PROP_ALPHA}. Please follow the links to the see the constant field values. Some values of alpha
* receive special names, since they are used so often. A value of 0.46 for the alpha results in a window named Hamming
* window. A value of 0.5 results in the Hanning window. And a value of 0 results in the Rectangular window. The default
* for this system is the Hamming window, with alpha 0.46 !). Figure 2 below shows the Hamming window function (a =
* 0.46), using our default window size of 25.625 ms and assuming a sample rate of 16kHz, thus yielding 410 samples per
* window. <p> <img alt="Hamming window" src="doc-files/hamming-window.gif"> <br><b>Figure 2: The Hamming window function.</b>
*
* @see Data
*/
public class RaisedCosineWindower extends BaseDataProcessor {
/** The property for window size in milliseconds. */
@S4Double(defaultValue = 25.625)
public static final String PROP_WINDOW_SIZE_MS = "windowSizeInMs";
private float windowSizeInMs;
/** The property for window shift in milliseconds, which has a default value of 10F. */
@S4Double(defaultValue = 10.0)
public static final String PROP_WINDOW_SHIFT_MS = "windowShiftInMs";
private float windowShiftInMs;
/** The property for the alpha value of the Window, which is the value for the RaisedCosineWindow. */
@S4Double(defaultValue = 0.46)
public static final String PROP_ALPHA = "alpha";
private double alpha;
// required to access the DataStartSignal-properties
public static final String WINDOW_SHIFT_SAMPLES = "windowSize";
public static final String WINDOW_SIZE_SAMPLES = "windowShift";
private double[] cosineWindow; // the raised consine window
private int windowShift; // the window size
private List<Data> outputQueue; // cache for output windows
private DoubleBuffer overflowBuffer; // cache for overlapped audio regions
private long currentFirstSampleNumber;
private int sampleRate;
public RaisedCosineWindower( double alpha, float windowSizeInMs, float windowShiftInMs ) {
initLogger();
this.alpha = alpha;
this.windowSizeInMs = windowSizeInMs;
this.windowShiftInMs = windowShiftInMs;
}
public RaisedCosineWindower( ) {
}
@Override
public void newProperties(PropertySheet ps) throws PropertyException {
super.newProperties(ps);
alpha = ps.getDouble(PROP_ALPHA);
windowSizeInMs = ps.getFloat(PROP_WINDOW_SIZE_MS);
windowShiftInMs = ps.getFloat(PROP_WINDOW_SHIFT_MS);
}
@Override
public void initialize() {
super.initialize();
// createWindow();
outputQueue = new LinkedList<Data>();
}
/**
* Creates the Hamming Window.
*
* @param sampleRate
*/
private void createWindow(int sampleRate) {
if (cosineWindow != null && sampleRate == this.sampleRate) {
return;
}
this.sampleRate = sampleRate;
int windowSize = DataUtil.getSamplesPerWindow(sampleRate, windowSizeInMs);
cosineWindow = new double[windowSize];
windowShift = DataUtil.getSamplesPerShift(sampleRate, windowShiftInMs);
if (cosineWindow.length > 1) {
double oneMinusAlpha = (1 - alpha);
for (int i = 0; i < cosineWindow.length; i++) {
cosineWindow[i] = oneMinusAlpha -
alpha * Math.cos(2 * Math.PI * i / (cosineWindow.length - 1.0));
}
}
overflowBuffer = new DoubleBuffer(windowSize);
}
/**
* Returns the next Data object, which is usually a window of the input Data, with the windowing function applied to
* it.
*
* @return the next available Data object, returns null if no Data object is available
* @throws DataProcessingException if a data processing error occurred
* @see Data
*/
@Override
public Data getData() throws DataProcessingException {
if (outputQueue.isEmpty()) {
Data input = getPredecessor().getData();
if (input != null) {
if (input instanceof DoubleData) {
DoubleData data = (DoubleData) input;
// System.err.print("to windower: ");
// System.err.println(Arrays.toString(data.getValues()));
if (currentFirstSampleNumber == -1) {
currentFirstSampleNumber = data.getFirstSampleNumber();
}
// should not be necessary if all DataProcessor would forward Signals. Unfortunately this
// is currently not the case.
createWindow(data.getSampleRate());
// process the Data, and output the windows
process(data);
} else {
if (input instanceof DataStartSignal) {
DataStartSignal startSignal = (DataStartSignal) input;
createWindow(startSignal.getSampleRate());
// attach the frame-length and the shift-length to the start-signal to allow
// detection of incorrect frontend settings
Map<String, Object> props = startSignal.getProps();
props.put(WINDOW_SHIFT_SAMPLES, windowShift);
props.put(WINDOW_SIZE_SAMPLES, cosineWindow.length);
// reset the current first sample number
currentFirstSampleNumber = -1;
} else if (input instanceof SpeechStartSignal) {
// reset the current first sample number
currentFirstSampleNumber = -1;
} else if (input instanceof DataEndSignal || input instanceof SpeechEndSignal) {
// end of utterance handling
processUtteranceEnd();
}
outputQueue.add(input);
}
}
}
if (!outputQueue.isEmpty()) {
Data output = outputQueue.remove(0);
if (output instanceof DoubleData) {
assert ((DoubleData) output).getValues().length ==
cosineWindow.length;
}
return output;
} else {
return null;
}
}
/**
* Applies the Windowing to the given Data. The resulting windows are cached in the outputQueue.
*
* @param input the input Data object
* @throws DataProcessingException if a data processing error occurs
*/
private void process(DoubleData input) throws DataProcessingException {
double[] in = input.getValues();
int length = overflowBuffer.getOccupancy() + in.length;
List<DoubleData> dataList = new LinkedList<DoubleData>();
dataList.add(input);
Data utteranceEnd = null;
// read in more Data if we have under one window's length of data
while (length < cosineWindow.length) {
Data next = getPredecessor().getData();
if (next instanceof DoubleData) {
dataList.add((DoubleData) next);
length += ((DoubleData) next).getValues().length;
} else {
if (next instanceof DataEndSignal || next instanceof SpeechEndSignal) {
utteranceEnd = next;
break;
}
outputQueue.add(next);
}
}
double[] allSamples = in;
// prepend overflow samples
if (length != in.length) {
allSamples = new double[length];
// copy overflow samples to allSamples buffer
System.arraycopy(overflowBuffer.getBuffer(), 0, allSamples, 0, overflowBuffer.getOccupancy());
int start = overflowBuffer.getOccupancy();
// copy input samples to allSamples buffer
for (DoubleData aDataList : dataList) {
double[] samples = aDataList.getValues();
System.arraycopy(samples, 0, allSamples, start, samples.length);
start += samples.length;
}
}
// apply Hamming window
int residual = applyRaisedCosineWindow(allSamples, length);
// save elements that also belong to the next window
overflowBuffer.reset();
if (length - residual > 0) {
overflowBuffer.append(allSamples, residual, length - residual);
}
if (utteranceEnd != null) {
// end of utterance handling
processUtteranceEnd();
outputQueue.add(utteranceEnd);
}
}
/**
* What happens when an DataEndSignal is received. Basically pads up to a window of the overflow buffer with zeros,
* and then apply the Hamming window to it. Checks if buffer has data.
*/
private void processUtteranceEnd() {
if (overflowBuffer.getOccupancy() > 0) {
overflowBuffer.padWindow(cosineWindow.length);
applyRaisedCosineWindow
(overflowBuffer.getBuffer(), cosineWindow.length);
overflowBuffer.reset();
}
}
/**
* Applies the Hamming window to the given double array. The windows are added to the output queue. Returns the
* index of the first array element of next window that is not produced because of insufficient data.
*
* @param in the audio data to apply window and the Hamming window
* @param length the number of elements in the array to apply the RaisedCosineWindow
* @return the index of the first array element of the next window
*/
private int applyRaisedCosineWindow(double[] in, int length) {
int windowCount;
// if no windows can be created but there is some data,
// pad it with zeros
if (length < cosineWindow.length) {
double[] padded = new double[cosineWindow.length];
System.arraycopy(in, 0, padded, 0, length);
in = padded;
windowCount = 1;
} else {
windowCount = getWindowCount(length, cosineWindow.length, windowShift);
}
// create all the windows at once, not individually, saves time
double[][] windows = new double[windowCount][cosineWindow.length];
int windowStart = 0;
for (int i = 0; i < windowCount; windowStart += windowShift, i++) {
double[] myWindow = windows[i];
// apply the Hamming Window function to the window of data
for (int w = 0, s = windowStart; w < myWindow.length; s++, w++) {
myWindow[w] = in[s] * cosineWindow[w];
}
// add the frame to the output queue
outputQueue.add(new DoubleData
(myWindow, sampleRate,
currentFirstSampleNumber));
currentFirstSampleNumber += windowShift;
}
return windowStart;
}
/**
* Returns the number of windows in the given array, given the windowSize and windowShift.
*
* @param arraySize the size of the array
* @param windowSize the window size
* @param windowShift the window shift
* @return the number of windows
*/
private static int getWindowCount(int arraySize, int windowSize,
int windowShift) {
if (arraySize < windowSize) {
return 0;
} else {
int windowCount = 1;
for (int windowEnd = windowSize;
windowEnd + windowShift <= arraySize;
windowEnd += windowShift) {
windowCount++;
}
return windowCount;
}
}
/**
* Returns the shift size used to window the incoming speech signal. This value might be used by other components to
* determine the time resolution of feature vectors.
* @return the shift of the window
*/
public float getWindowShiftInMs() {
if (windowShiftInMs == 0)
throw new RuntimeException(this + " was not initialized yet!");
return windowShiftInMs;
}
public int getSampleRate() {
return sampleRate;
}
/**
* Rounds a given sample-number to the number of samples will be processed by this instance including the padding
* samples at the end..
* @param samples samples to round to
* @return rounded result
*/
public long roundToFrames(long samples) {
int windowSize = DataUtil.getSamplesPerWindow(sampleRate, windowSizeInMs);
int windowShift = DataUtil.getSamplesPerShift(sampleRate, windowShiftInMs);
long mxNumShifts = samples / windowShift;
for (int i = (int) mxNumShifts; ; i--) {
long remainingSamples = samples - windowShift * i;
if (remainingSamples > windowSize)
return windowShift * (i + 1) + windowSize;
}
}
}
class DoubleBuffer {
private final double[] buffer;
private int occupancy;
/** Constructs a DoubleBuffer of the given size.
* @param size*/
DoubleBuffer(int size) {
buffer = new double[size];
occupancy = 0;
}
/**
* Returns the number of elements in this DoubleBuffer.
*
* @return the number of elements in this DoubleBuffer.
*/
public int getOccupancy() {
return occupancy;
}
/**
* Returns the underlying double array used to store the data.
*
* @return the underlying double array
*/
public double[] getBuffer() {
return buffer;
}
/**
* Appends all the elements in the given array to this DoubleBuffer.
*
* @param src the array to copy from
* @return the resulting number of elements in this DoubleBuffer.
*/
public int appendAll(double[] src) {
return append(src, 0, src.length);
}
/**
* Appends the specified elements in the given array to this DoubleBuffer.
*
* @param src the array to copy from
* @param srcPos where in the source array to start from
* @param length the number of elements to copy
* @return the resulting number of elements in this DoubleBuffer
*/
public int append(double[] src, int srcPos, int length) {
if (occupancy + length > buffer.length) {
throw new Error("RaisedCosineWindower: " +
"overflow-buffer: attempting to fill " +
"buffer beyond its capacity.");
}
System.arraycopy(src, srcPos, buffer, occupancy, length);
occupancy += length;
return occupancy;
}
/**
* If there are less than windowSize elements in this DoubleBuffer, pad the up to windowSize elements with zero.
*
* @param windowSize the window size
*/
public void padWindow(int windowSize) {
if (occupancy < windowSize) {
Arrays.fill(buffer, occupancy, windowSize, 0);
}
}
/** Sets the number of elements in this DoubleBuffer to zero, without actually remove the elements. */
public void reset() {
occupancy = 0;
}
}