/**
* Copyright 2004-2010 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.signalproc.display;
import java.awt.Color;
import java.awt.Graphics2D;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.List;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.swing.BoxLayout;
import javax.swing.JComboBox;
import javax.swing.JLabel;
import javax.swing.JPanel;
import javax.swing.JSlider;
import javax.swing.event.ChangeEvent;
import javax.swing.event.ChangeListener;
import marytts.signalproc.analysis.FrameBasedAnalyser;
import marytts.signalproc.analysis.ShortTermCepstrumAnalyser;
import marytts.signalproc.window.Window;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.math.MathUtils;
import marytts.util.string.PrintfFormat;
/**
* @author Marc Schröder
*
*/
public class Cepstrogram extends FunctionGraph {
public static final int DEFAULT_WINDOW = Window.HAMMING;
public static final int DEFAULT_FFTSIZE = 1024;
public static final int DEFAULT_WINDOWSHIFT = 32;
protected double dynamicRange; // dB below global maximum to show
protected static final double QUEF_MAX = 0.016; // 16 ms = upper limit quefrency to show
protected double[] signal;
protected int samplingRate;
protected Window window;
protected int windowShift;
protected int fftSize;
protected List<double[]> cepstra;
protected double cepstra_max = 0.;
protected double cepstra_min = 0.;
protected double deltaQ = 0.; // distance in quefrency ms between two cepstrum samples
protected int cepstra_indexmax = 0; // index in each spectrum corresponding to QUEF_MAX
public Cepstrogram(AudioInputStream ais) {
this(ais, DEFAULT_WIDTH, DEFAULT_HEIGHT);
}
public Cepstrogram(AudioInputStream ais, int width, int height) {
this(ais, Window.get(DEFAULT_WINDOW, DEFAULT_FFTSIZE / 4 + 1), DEFAULT_WINDOWSHIFT, DEFAULT_FFTSIZE, width, height);
}
public Cepstrogram(AudioInputStream ais, Window window, int windowShift, int fftSize) {
this(ais, window, windowShift, fftSize, DEFAULT_WIDTH, DEFAULT_HEIGHT);
}
public Cepstrogram(AudioInputStream ais, Window window, int windowShift, int fftSize, int width, int height) {
super();
if (!ais.getFormat().getEncoding().equals(AudioFormat.Encoding.PCM_SIGNED)) {
ais = AudioSystem.getAudioInputStream(AudioFormat.Encoding.PCM_SIGNED, ais);
}
if (ais.getFormat().getChannels() > 1) {
throw new IllegalArgumentException("Can only deal with mono audio signals");
}
if (!MathUtils.isPowerOfTwo(fftSize))
throw new IllegalArgumentException("fftSize must be a power of two");
AudioDoubleDataSource signalSource = new AudioDoubleDataSource(ais);
initialise(signalSource.getAllData(), signalSource.getSamplingRate(), window, windowShift, fftSize, width, height);
}
public Cepstrogram(double[] signal, int samplingRate) {
this(signal, samplingRate, DEFAULT_WIDTH, DEFAULT_HEIGHT);
}
public Cepstrogram(double[] signal, int samplingRate, int width, int height) {
this(signal, samplingRate, Window.get(DEFAULT_WINDOW, DEFAULT_FFTSIZE / 4 + 1), DEFAULT_WINDOWSHIFT, DEFAULT_FFTSIZE,
width, height);
}
public Cepstrogram(double[] signal, int samplingRate, Window window, int windowShift, int fftSize, int width, int height) {
initialise(signal, samplingRate, window, windowShift, fftSize, width, height);
}
protected void initialise(double[] aSignal, int aSamplingRate, Window aWindow, int aWindowShift, int aFftSize, int width,
int height) {
this.signal = aSignal;
this.samplingRate = aSamplingRate;
this.window = aWindow;
this.windowShift = aWindowShift;
this.fftSize = aFftSize;
super.initialise(width, height, 0, (double) aWindowShift / aSamplingRate, new double[10]);
update();
}
protected void update() {
ShortTermCepstrumAnalyser cepstrumAnalyser = new ShortTermCepstrumAnalyser(new BufferedDoubleDataSource(signal), fftSize,
8192, window, windowShift, samplingRate);
cepstra = new ArrayList<double[]>();
// Frequency resolution of the FFT:
deltaQ = cepstrumAnalyser.getQuefrencyResolution();
long startTime = System.currentTimeMillis();
cepstra_max = Double.NaN;
cepstra_min = Double.NaN;
FrameBasedAnalyser.FrameAnalysisResult<double[]>[] results = cepstrumAnalyser.analyseAllFrames();
for (int i = 0; i < results.length; i++) {
double[] cepstrum = results[i].get();
cepstra.add(cepstrum);
// Still do the preemphasis inline:
for (int j = 0; j < cepstrum.length; j++) {
// double freqPreemphasis = PREEMPHASIS / Math.log(2) * Math.log((j+1)*deltaF/1000.);
// spectrum[j] += freqPreemphasis;
if (Double.isNaN(cepstra_min) || cepstrum[j] < cepstra_min) {
cepstra_min = cepstrum[j];
}
if (Double.isNaN(cepstra_max) || cepstrum[j] > cepstra_max) {
cepstra_max = cepstrum[j];
}
}
}
// dynamicRange = (cepstra_max - cepstra_min);
dynamicRange = cepstra_max - 0;
long endTime = System.currentTimeMillis();
System.err.println("Computed " + cepstra.size() + " cepstra in " + (endTime - startTime) + " ms.");
cepstra_indexmax = (int) (QUEF_MAX / deltaQ);
if (cepstra_indexmax > cepstrumAnalyser.getInverseFFTWindowLength() / 2)
cepstra_indexmax = cepstrumAnalyser.getInverseFFTWindowLength() / 2; // == cepstra[i].length
super.updateData(0, (double) windowShift / samplingRate, new double[cepstra.size()]);
// correct y axis boundaries, for graph:
ymax = 0.;
ymin = -cepstra_indexmax * deltaQ;
repaint();
}
/**
* While painting the graph, draw the actual function data.
*
* @param g
* the graphics2d object to paint in
* @param image_fromX
* first visible X coordinate of the Graph display area (= after subtracting space reserved for Y axis)
* @param image_toX
* last visible X coordinate of the Graph display area (= after subtracting space reserved for Y axis)
* @param image_refX
* X coordinate of the origin, in the display area
* @param image_refY
* Y coordinate of the origin, in the display area
* @param startY
* the start position on the Y axis (= the lower bound of the drawing area)
* @param image_height
* the height of the drawable region for the y values
* @param data
* data
* @param currentGraphColor
* current graph color
* @param currentGraphStyle
* current graph style
* @param currentDotStyle
* current dot style
*/
@Override
protected void drawData(Graphics2D g, int image_fromX, int image_toX, int image_refX, int image_refY, int startY,
int image_height, double[] data, Color currentGraphColor, int currentGraphStyle, int currentDotStyle) {
int index_fromX = imageX2indexX(image_fromX);
int index_toX = imageX2indexX(image_toX);
// System.err.println("Drawing cepstra from image " + image_fromX + " to " + image_toX);
for (int i = index_fromX; i < index_toX; i++) {
// System.err.println("Drawing spectrum " + i);
int spectrumWidth = indexX2imageX(1);
if (spectrumWidth == 0)
spectrumWidth = 1;
drawCepstrum(g, cepstra.get(i), image_refX + indexX2imageX(i), spectrumWidth, image_refY, image_height);
}
}
protected void drawCepstrum(Graphics2D g, double[] cepstrum, int image_X, int image_width, int image_refY, int image_height) {
double yScaleFactor = (double) image_height / cepstra_indexmax;
if (image_width < 2)
image_width = 2;
int rect_height = (int) Math.ceil(yScaleFactor);
if (rect_height < 2)
rect_height = 2;
for (int i = 0; i < cepstra_indexmax; i++) {
int color;
if (Double.isNaN(cepstrum[i]) || cepstrum[i] < cepstra_max - dynamicRange) {
color = 255; // white
} else {
color = (int) (255 * (cepstra_max - cepstrum[i]) / dynamicRange);
}
g.setColor(new Color(color, color, color));
g.fillRect(image_X, image_refY + (int) (i * yScaleFactor), image_width, rect_height);
}
}
protected String getLabel(double x, double y) {
int precisionX = -(int) (Math.log(getXRange()) / Math.log(10)) + 2;
if (precisionX < 0)
precisionX = 0;
int indexX = X2indexX(x);
double[] spectrum = (double[]) cepstra.get(indexX);
int precisionY = -(int) (Math.log(getYRange()) / Math.log(10)) + 2;
if (precisionY < 0)
precisionY = 0;
double E = spectrum[Y2indexY(y)];
int precisionE = 1;
return "E(" + new PrintfFormat("%." + precisionX + "f").sprintf(x) + ","
+ new PrintfFormat("%." + precisionY + "f").sprintf(y) + ")="
+ new PrintfFormat("%." + precisionE + "f").sprintf(E);
}
protected int imageY2indexY(int imageY) {
double y = imageY2Y(imageY);
return Y2indexY(y);
}
protected int Y2indexY(double y) {
assert ymin == 0; // or we would have to write (ymax-ymin) or so below
return (int) (cepstra_indexmax * y / ymax);
}
protected JPanel getControls() {
JPanel controls = new JPanel();
controls.setLayout(new BoxLayout(controls, BoxLayout.Y_AXIS));
// FFT size slider:
JLabel fftLabel = new JLabel("FFT size:");
fftLabel.setAlignmentX(CENTER_ALIGNMENT);
controls.add(fftLabel);
int min = 5;
int max = 13;
int deflt = (int) (Math.log(this.fftSize) / Math.log(2));
JSlider fftSizeSlider = new JSlider(JSlider.VERTICAL, min, max, deflt);
fftSizeSlider.setAlignmentX(CENTER_ALIGNMENT);
fftSizeSlider.setMajorTickSpacing(1);
fftSizeSlider.setPaintTicks(true);
fftSizeSlider.setSnapToTicks(true);
Hashtable<Integer, JLabel> labelTable = new Hashtable<Integer, JLabel>();
for (int i = min; i <= max; i++) {
int twoPowI = 1 << i; // 2^i, e.g. i==8 => twoPowI==256
labelTable.put(new Integer(i), new JLabel(String.valueOf(twoPowI)));
}
fftSizeSlider.setLabelTable(labelTable);
fftSizeSlider.setPaintLabels(true);
fftSizeSlider.addChangeListener(new ChangeListener() {
public void stateChanged(ChangeEvent ce) {
JSlider source = (JSlider) ce.getSource();
if (!source.getValueIsAdjusting()) {
int logfftSize = (int) source.getValue();
int newFftSize = 1 << logfftSize;
if (newFftSize != Cepstrogram.this.fftSize) {
Cepstrogram.this.fftSize = newFftSize;
Cepstrogram.this.window = Window.get(Cepstrogram.this.window.type(), newFftSize / 4 + 1);
Cepstrogram.this.update();
}
}
}
});
controls.add(fftSizeSlider);
// Window type:
JLabel windowTypeLabel = new JLabel("Window type:");
windowTypeLabel.setAlignmentX(CENTER_ALIGNMENT);
controls.add(windowTypeLabel);
int[] windowTypes = Window.getAvailableTypes();
Window[] windows = new Window[windowTypes.length];
int selected = 0;
for (int i = 0; i < windowTypes.length; i++) {
windows[i] = Window.get(windowTypes[i], 1);
if (windowTypes[i] == this.window.type())
selected = i;
}
JComboBox windowList = new JComboBox(windows);
windowList.setAlignmentX(CENTER_ALIGNMENT);
windowList.setSelectedIndex(selected);
windowList.setMaximumSize(windowList.getPreferredSize());
windowList.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
JComboBox cb = (JComboBox) e.getSource();
int newWindowType = ((Window) cb.getSelectedItem()).type();
if (newWindowType != Cepstrogram.this.window.type()) {
Cepstrogram.this.window = Window.get(newWindowType, Cepstrogram.this.window.getLength());
Cepstrogram.this.update();
}
}
});
controls.add(windowList);
return controls;
}
public static void main(String[] args) throws Exception {
for (int i = 0; i < args.length; i++) {
AudioInputStream ais = AudioSystem.getAudioInputStream(new File(args[i]));
Cepstrogram signalSpectrum = new Cepstrogram(ais);
signalSpectrum.showInJFrame(args[i], true, true);
}
}
}