/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.util.data.audio;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.UnsupportedAudioFileException;
import javax.swing.JFileChooser;
import javax.swing.JOptionPane;
import marytts.signalproc.analysis.EnergyAnalyser;
import marytts.signalproc.analysis.EnergyAnalyser_dB;
import marytts.signalproc.analysis.F0TrackerAutocorrelationHeuristic;
import marytts.signalproc.analysis.FrameBasedAnalyser;
import marytts.signalproc.analysis.PitchFileHeader;
import marytts.signalproc.process.EnergyNormaliser;
import marytts.util.Pair;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.DoubleDataSource;
import marytts.util.io.FileUtils;
import marytts.util.math.MathUtils;
/**
* AudioConverterGUI.java
*
* Created on 24. Juni 2009, 10:04
*
* @author marc
*/
public class AudioConverterGUI extends javax.swing.JFrame {
/** Creates new form AudioConverterGUI */
public AudioConverterGUI() {
initComponents();
}
/**
* This method is called from within the constructor to initialize the form. WARNING: Do NOT modify this code. The content of
* this method is always regenerated by the Form Editor.
*/
// <editor-fold defaultstate="collapsed" desc=" Generated Code ">//GEN-BEGIN:initComponents
private void initComponents() {
java.awt.GridBagConstraints gridBagConstraints;
buttonGroup1 = new javax.swing.ButtonGroup();
jLabel1 = new javax.swing.JLabel();
tfInputDir = new javax.swing.JTextField();
bBrowseInputDir = new javax.swing.JButton();
bBrowseOutputDir = new javax.swing.JButton();
tfOutputDir = new javax.swing.JTextField();
jLabel2 = new javax.swing.JLabel();
jPanel1 = new javax.swing.JPanel();
cbStereoMono = new javax.swing.JCheckBox();
jLabel3 = new javax.swing.JLabel();
cbDownsample = new javax.swing.JCheckBox();
comboSampleRate = new javax.swing.JComboBox();
cbPowerNormalise = new javax.swing.JCheckBox();
cbBestOnly = new javax.swing.JCheckBox();
cbHighPassFilter = new javax.swing.JCheckBox();
cbGlobalAmplitude = new javax.swing.JCheckBox();
jLabel5 = new javax.swing.JLabel();
comboMaxAmplitude = new javax.swing.JComboBox();
jLabel6 = new javax.swing.JLabel();
tfSoxPath = new javax.swing.JTextField();
bBrowseSoxPath = new javax.swing.JButton();
cbTrimSilences = new javax.swing.JCheckBox();
jPanel2 = new javax.swing.JPanel();
rbLeft = new javax.swing.JRadioButton();
rbRight = new javax.swing.JRadioButton();
rbBoth = new javax.swing.JRadioButton();
jLabel4 = new javax.swing.JLabel();
progressBar = new javax.swing.JProgressBar();
bRun = new javax.swing.JButton();
bQuit = new javax.swing.JButton();
setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE);
setTitle("Speech Synthesis Recordings Audio Converter");
jLabel1.setText("Input Wave directory:");
bBrowseInputDir.setText("Browse");
bBrowseInputDir.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
browseInputDirActionPerformed(evt);
}
});
bBrowseOutputDir.setText("Browse");
bBrowseOutputDir.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
browseOutputDirActionPerformed(evt);
}
});
jLabel2.setText("Output Wave directory:");
jPanel1.setLayout(new java.awt.GridBagLayout());
jPanel1.setBorder(javax.swing.BorderFactory.createTitledBorder("Processing options"));
cbStereoMono.setText("Stereo to Mono conversion");
cbStereoMono.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0));
cbStereoMono.setMargin(new java.awt.Insets(0, 0, 0, 0));
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 4;
gridBagConstraints.gridwidth = 2;
gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
gridBagConstraints.insets = new java.awt.Insets(10, 0, 0, 0);
jPanel1.add(cbStereoMono, gridBagConstraints);
jLabel3.setText("Input source channel:");
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 5;
gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
gridBagConstraints.insets = new java.awt.Insets(0, 25, 10, 5);
jPanel1.add(jLabel3, gridBagConstraints);
cbDownsample.setText("Sampling rate conversion (down-sampling)");
cbDownsample.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0));
cbDownsample.setMargin(new java.awt.Insets(0, 0, 0, 0));
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 8;
gridBagConstraints.gridwidth = 2;
gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
gridBagConstraints.insets = new java.awt.Insets(10, 0, 0, 0);
jPanel1.add(cbDownsample, gridBagConstraints);
comboSampleRate.setEditable(true);
comboSampleRate.setModel(new javax.swing.DefaultComboBoxModel(new String[] { "16000", "22050" }));
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 1;
gridBagConstraints.gridy = 9;
gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
jPanel1.add(comboSampleRate, gridBagConstraints);
cbPowerNormalise.setText("Power normalisation across recording sessions");
cbPowerNormalise
.setToolTipText("Cluster wav files by timestamp, and normalise cluster averages.\n\nWav files recorded with less than 10 minutes gap between them are treated like a single recording session. Only clusters as a whole are normalised, in order to even out different recording volumes in different recording sessions.\n");
cbPowerNormalise.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0));
cbPowerNormalise.setMargin(new java.awt.Insets(0, 0, 0, 0));
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 3;
gridBagConstraints.gridwidth = 2;
gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
gridBagConstraints.insets = new java.awt.Insets(0, 25, 10, 0);
jPanel1.add(cbPowerNormalise, gridBagConstraints);
cbBestOnly.setSelected(true);
cbBestOnly.setText("Process only the best take of each sentence");
cbBestOnly.setToolTipText("Process a001.wav, but not a001a.wav, a001b.wav etc.");
cbBestOnly.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0));
cbBestOnly.setMargin(new java.awt.Insets(0, 0, 0, 0));
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 0;
gridBagConstraints.gridwidth = 2;
gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
gridBagConstraints.insets = new java.awt.Insets(0, 0, 10, 0);
jPanel1.add(cbBestOnly, gridBagConstraints);
cbHighPassFilter.setSelected(true);
cbHighPassFilter.setText("Remove low-frequency noise below 50 Hz");
cbHighPassFilter.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0));
cbHighPassFilter.setMargin(new java.awt.Insets(0, 0, 0, 0));
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 6;
gridBagConstraints.gridwidth = 2;
gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
gridBagConstraints.insets = new java.awt.Insets(10, 0, 0, 0);
jPanel1.add(cbHighPassFilter, gridBagConstraints);
cbGlobalAmplitude.setSelected(true);
cbGlobalAmplitude.setText("Global amplitude scaling");
cbGlobalAmplitude.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0));
cbGlobalAmplitude.setMargin(new java.awt.Insets(0, 0, 0, 0));
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 1;
gridBagConstraints.gridwidth = 2;
gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
gridBagConstraints.insets = new java.awt.Insets(10, 0, 0, 0);
jPanel1.add(cbGlobalAmplitude, gridBagConstraints);
jLabel5.setText("Target max. amplitude:");
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 2;
gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
gridBagConstraints.insets = new java.awt.Insets(0, 25, 0, 5);
jPanel1.add(jLabel5, gridBagConstraints);
comboMaxAmplitude.setEditable(true);
comboMaxAmplitude.setModel(new javax.swing.DefaultComboBoxModel(new String[] { "0.5", "0.6", "0.7", "0.8", "0.9",
"1.0 (maximum)", "1.1 (causes clipping)", "1.2 (causes clipping)", "1.3 (causes clipping)",
"1.4 (causes clipping)", "1.5 (causes clipping)" }));
comboMaxAmplitude.setSelectedIndex(4);
comboMaxAmplitude.setMinimumSize(new java.awt.Dimension(70, 28));
comboMaxAmplitude.setPreferredSize(new java.awt.Dimension(70, 28));
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 1;
gridBagConstraints.gridy = 2;
gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
jPanel1.add(comboMaxAmplitude, gridBagConstraints);
jLabel6.setText("Location of sox binary:");
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 10;
gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
gridBagConstraints.insets = new java.awt.Insets(0, 25, 10, 5);
jPanel1.add(jLabel6, gridBagConstraints);
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 1;
gridBagConstraints.gridy = 10;
gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
gridBagConstraints.insets = new java.awt.Insets(0, 0, 10, 0);
jPanel1.add(tfSoxPath, gridBagConstraints);
bBrowseSoxPath.setText("Browse");
bBrowseSoxPath.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
browseSoxActionPerformed(evt);
}
});
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 2;
gridBagConstraints.gridy = 10;
gridBagConstraints.insets = new java.awt.Insets(0, 0, 10, 0);
jPanel1.add(bBrowseSoxPath, gridBagConstraints);
cbTrimSilences.setSelected(true);
cbTrimSilences.setText("Trim initial and final silences");
cbTrimSilences.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0));
cbTrimSilences.setMargin(new java.awt.Insets(0, 0, 0, 0));
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 7;
gridBagConstraints.gridwidth = 2;
gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
gridBagConstraints.insets = new java.awt.Insets(10, 0, 10, 0);
jPanel1.add(cbTrimSilences, gridBagConstraints);
buttonGroup1.add(rbLeft);
rbLeft.setSelected(true);
rbLeft.setText("left");
rbLeft.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0));
rbLeft.setMargin(new java.awt.Insets(0, 0, 0, 0));
jPanel2.add(rbLeft);
buttonGroup1.add(rbRight);
rbRight.setText("right");
rbRight.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0));
rbRight.setMargin(new java.awt.Insets(0, 0, 0, 0));
jPanel2.add(rbRight);
buttonGroup1.add(rbBoth);
rbBoth.setText("both");
rbBoth.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0));
rbBoth.setMargin(new java.awt.Insets(0, 0, 0, 0));
jPanel2.add(rbBoth);
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 1;
gridBagConstraints.gridy = 5;
gridBagConstraints.gridwidth = 2;
gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
gridBagConstraints.insets = new java.awt.Insets(0, 0, 10, 0);
jPanel1.add(jPanel2, gridBagConstraints);
jLabel4.setText("Target sample rate (in Hz):");
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 9;
gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL;
gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST;
gridBagConstraints.insets = new java.awt.Insets(0, 25, 0, 5);
jPanel1.add(jLabel4, gridBagConstraints);
bRun.setText("Run");
bRun.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
runActionPerformed(evt);
}
});
bQuit.setText("Quit");
bQuit.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
bQuitActionPerformed(evt);
}
});
org.jdesktop.layout.GroupLayout layout = new org.jdesktop.layout.GroupLayout(getContentPane());
getContentPane().setLayout(layout);
layout.setHorizontalGroup(layout
.createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING)
.add(org.jdesktop.layout.GroupLayout.TRAILING,
layout.createSequentialGroup()
.addContainerGap()
.add(layout.createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING).add(jLabel1)
.add(jLabel2))
.addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED)
.add(layout.createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING)
.add(tfOutputDir, org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, 217, Short.MAX_VALUE)
.add(tfInputDir, org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, 217, Short.MAX_VALUE))
.addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED)
.add(layout.createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING)
.add(org.jdesktop.layout.GroupLayout.TRAILING, bBrowseOutputDir)
.add(org.jdesktop.layout.GroupLayout.TRAILING, bBrowseInputDir)).addContainerGap())
.add(layout.createSequentialGroup().addContainerGap().add(jPanel1, 0, 465, Short.MAX_VALUE).addContainerGap())
.add(layout
.createSequentialGroup()
.add(80, 80, 80)
.add(layout
.createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING)
.add(org.jdesktop.layout.GroupLayout.TRAILING, progressBar,
org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, 358, Short.MAX_VALUE)
.add(layout.createSequentialGroup().add(bRun)
.addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED, 208, Short.MAX_VALUE)
.add(bQuit))).add(67, 67, 67)));
layout.setVerticalGroup(layout.createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING).add(
layout.createSequentialGroup()
.addContainerGap()
.add(layout
.createParallelGroup(org.jdesktop.layout.GroupLayout.TRAILING)
.add(layout.createSequentialGroup().add(bBrowseInputDir)
.addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED).add(bBrowseOutputDir))
.add(layout
.createSequentialGroup()
.add(layout
.createParallelGroup(org.jdesktop.layout.GroupLayout.BASELINE)
.add(jLabel1)
.add(tfInputDir, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE,
org.jdesktop.layout.GroupLayout.DEFAULT_SIZE,
org.jdesktop.layout.GroupLayout.PREFERRED_SIZE))
.addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED)
.add(layout
.createParallelGroup(org.jdesktop.layout.GroupLayout.BASELINE)
.add(jLabel2)
.add(tfOutputDir, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE,
org.jdesktop.layout.GroupLayout.DEFAULT_SIZE,
org.jdesktop.layout.GroupLayout.PREFERRED_SIZE))))
.addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED)
.add(jPanel1, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE,
org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE)
.addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED)
.add(progressBar, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE,
org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE)
.addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED)
.add(layout.createParallelGroup(org.jdesktop.layout.GroupLayout.BASELINE).add(bRun).add(bQuit))
.addContainerGap(org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)));
pack();
}// </editor-fold>//GEN-END:initComponents
private void browseSoxActionPerformed(java.awt.event.ActionEvent evt) {// GEN-FIRST:event_browseSoxActionPerformed
JFileChooser fc = new JFileChooser();
fc.setFileSelectionMode(JFileChooser.FILES_ONLY);
int returnVal = fc.showOpenDialog(this);
if (returnVal == JFileChooser.APPROVE_OPTION) {
File file = fc.getSelectedFile();
// System.out.println("Opening: " + file.getAbsolutePath());
tfSoxPath.setText(file.getAbsolutePath());
}
}// GEN-LAST:event_browseSoxActionPerformed
private void bQuitActionPerformed(java.awt.event.ActionEvent evt) {// GEN-FIRST:event_bQuitActionPerformed
System.exit(0);
}// GEN-LAST:event_bQuitActionPerformed
private void browseOutputDirActionPerformed(java.awt.event.ActionEvent evt) {// GEN-FIRST:event_browseOutputDirActionPerformed
JFileChooser fc = new JFileChooser();
fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
int returnVal = fc.showOpenDialog(this);
if (returnVal == JFileChooser.APPROVE_OPTION) {
File file = fc.getSelectedFile();
// System.out.println("Opening: " + file.getAbsolutePath());
tfOutputDir.setText(file.getAbsolutePath());
}
}// GEN-LAST:event_browseOutputDirActionPerformed
private void browseInputDirActionPerformed(java.awt.event.ActionEvent evt) {// GEN-FIRST:event_browseInputDirActionPerformed
JFileChooser fc = new JFileChooser();
fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
int returnVal = fc.showOpenDialog(this);
if (returnVal == JFileChooser.APPROVE_OPTION) {
File file = fc.getSelectedFile();
// System.out.println("Opening: " + file.getAbsolutePath());
tfInputDir.setText(file.getAbsolutePath());
}
}// GEN-LAST:event_browseInputDirActionPerformed
private void runActionPerformed(java.awt.event.ActionEvent evt) {// GEN-FIRST:event_runActionPerformed
String inDirPath = tfInputDir.getText();
if (inDirPath.equals("")) {
JOptionPane.showConfirmDialog(this, "Input directory not specified!", "Info missing", JOptionPane.OK_OPTION,
JOptionPane.ERROR_MESSAGE);
return;
}
String outDirPath = tfOutputDir.getText();
if (outDirPath.equals("")) {
JOptionPane.showConfirmDialog(this, "Output directory not specified!", "Info missing", JOptionPane.OK_OPTION,
JOptionPane.ERROR_MESSAGE);
return;
}
boolean bestOnly = cbBestOnly.isSelected();
boolean stereoMono = cbStereoMono.isSelected();
int whichChannel = AudioPlayer.LEFT_ONLY;
if (rbRight.isSelected())
whichChannel = AudioPlayer.RIGHT_ONLY;
else if (rbBoth.isSelected())
whichChannel = AudioPlayer.STEREO;
boolean downSample = cbDownsample.isSelected();
int targetSampleRate = Integer.parseInt((String) comboSampleRate.getSelectedItem());
String soxPath = tfSoxPath.getText();
if (downSample && !new File(soxPath).exists()) {
JOptionPane.showConfirmDialog(this, "Please indicate location of 'sox' tool\n"
+ "or deactivate sample rate conversion.", "Info missing", JOptionPane.OK_OPTION);
return;
}
boolean highPassFilter = cbHighPassFilter.isSelected();
boolean powerNormalise = cbPowerNormalise.isSelected();
boolean maximiseAmplitude = cbGlobalAmplitude.isSelected();
double targetMaxAmplitude = Double.parseDouble(((String) comboMaxAmplitude.getSelectedItem()).substring(0, 3));
boolean trimSilences = cbTrimSilences.isSelected();
if (!(stereoMono || downSample || highPassFilter || powerNormalise || trimSilences)) {
JOptionPane.showConfirmDialog(this, "Nothing to do!", "Info missing", JOptionPane.OK_OPTION,
JOptionPane.ERROR_MESSAGE);
return;
}
// OK, we have a valid request
progressBar.setStringPainted(true);
try {
new Converter(inDirPath, outDirPath, bestOnly, stereoMono, whichChannel, downSample, targetSampleRate, soxPath,
highPassFilter, powerNormalise, maximiseAmplitude, targetMaxAmplitude, trimSilences).start();
} catch (IOException ioe) {
ioe.printStackTrace();
progressBar.setStringPainted(false);
}
}// GEN-LAST:event_runActionPerformed
/**
* @param args
* the command line arguments
*/
public static void main(String args[]) {
java.awt.EventQueue.invokeLater(new Runnable() {
public void run() {
new AudioConverterGUI().setVisible(true);
}
});
}
// Variables declaration - do not modify//GEN-BEGIN:variables
private javax.swing.JButton bBrowseInputDir;
private javax.swing.JButton bBrowseOutputDir;
private javax.swing.JButton bBrowseSoxPath;
private javax.swing.JButton bQuit;
private javax.swing.JButton bRun;
private javax.swing.ButtonGroup buttonGroup1;
private javax.swing.JCheckBox cbBestOnly;
private javax.swing.JCheckBox cbDownsample;
private javax.swing.JCheckBox cbGlobalAmplitude;
private javax.swing.JCheckBox cbHighPassFilter;
private javax.swing.JCheckBox cbPowerNormalise;
private javax.swing.JCheckBox cbStereoMono;
private javax.swing.JCheckBox cbTrimSilences;
private javax.swing.JComboBox comboMaxAmplitude;
private javax.swing.JComboBox comboSampleRate;
private javax.swing.JLabel jLabel1;
private javax.swing.JLabel jLabel2;
private javax.swing.JLabel jLabel3;
private javax.swing.JLabel jLabel4;
private javax.swing.JLabel jLabel5;
private javax.swing.JLabel jLabel6;
private javax.swing.JPanel jPanel1;
private javax.swing.JPanel jPanel2;
private javax.swing.JProgressBar progressBar;
private javax.swing.JRadioButton rbBoth;
private javax.swing.JRadioButton rbLeft;
private javax.swing.JRadioButton rbRight;
private javax.swing.JTextField tfInputDir;
private javax.swing.JTextField tfOutputDir;
private javax.swing.JTextField tfSoxPath;
// End of variables declaration//GEN-END:variables
public class Converter extends Thread {
private File inDir;
private File outDir;
private FilenameFilter filenameFilter;
private boolean bestOnly;
private boolean stereoMono;
private int channel;
private boolean downSample;
private int targetSampleRate;
private String soxPath;
private boolean highPassFilter;
private boolean powerNormalise;
private boolean maximiseAmplitude;
private double targetMaxAmplitude;
private boolean trimSilences;
public Converter(String inDirPath, String outDirPath, final boolean bestOnly, boolean stereoMono, int channel,
boolean downSample, int targetSampleRate, String soxPath, boolean highPassFilter, boolean powerNormalise,
boolean maximiseAmplitude, double targetMaxAmplitude, boolean trimSilences) throws IOException {
inDir = new File(inDirPath);
outDir = new File(outDirPath);
if (!outDir.exists())
outDir.mkdirs();
filenameFilter = new FilenameFilter() {
public boolean accept(File file, String filename) {
if (!filename.endsWith(".wav"))
return false;
char lastCharInBasename = filename.charAt(filename.length() - 5);
if (bestOnly && (lastCharInBasename < '0' || lastCharInBasename > '9')) {
return false;
}
return true;
}
};
this.bestOnly = bestOnly;
this.stereoMono = stereoMono;
this.channel = channel;
this.downSample = downSample;
this.targetSampleRate = targetSampleRate;
this.soxPath = soxPath;
this.highPassFilter = highPassFilter;
this.powerNormalise = powerNormalise;
this.maximiseAmplitude = maximiseAmplitude;
this.targetMaxAmplitude = targetMaxAmplitude;
this.trimSilences = trimSilences;
}
public void run() {
int progress = 0;
int numSteps = 1;
int stepsComplete = 0;
if (maximiseAmplitude)
numSteps++;
if (powerNormalise)
numSteps++;
int interProgress = 0;
int stepProgress = 100 / numSteps;
try {
File[] wavFiles = inDir.listFiles(filenameFilter);
System.out.println("Number of wave files to convert: " + wavFiles.length);
double[] amplitudeFactors = new double[wavFiles.length];
Arrays.fill(amplitudeFactors, 1); // factor 1 = no change
if (powerNormalise) {
Arrays.sort(wavFiles, new Comparator<File>() {
public int compare(File f1, File f2) {
long t1 = f1.lastModified();
long t2 = f2.lastModified();
if (t1 < t2)
return -1;
if (t1 > t2)
return 1;
return 0;
}
public boolean equals(Object obj) {
return false;
}
});
List<Pair<Integer, Integer>> sessions = new ArrayList<Pair<Integer, Integer>>();
List<Double> sessionEnergies = new ArrayList<Double>();
double maxEnergy = Double.NEGATIVE_INFINITY;
// each session pair is the index numbers delimiting the session in typical java fashion,
// i.e. from the first element to one higher than the last element.
// e.g., (3,6) includes wavFiles[3],wavFiles[4],wavFiles[5]
int currentStart = 0;
for (int i = 0; i < wavFiles.length - 1; i++) {
long ti = wavFiles[i].lastModified();
long ti1 = wavFiles[i + 1].lastModified();
// System.out.printf(Locale.US, wavFiles[i].getName()+" %tc\n", ti);
assert ti1 >= ti; // we sorted it, didn't we
if (ti1 - ti > 600000) { // 600.000 ms = 10 min
// System.out.println("Break after "+wavFiles[i].getName());
System.out.println();
sessions.add(new Pair<Integer, Integer>(currentStart, i + 1));
currentStart = i + 1;
}
if (i == wavFiles.length - 2) {
// System.out.printf(Locale.US, wavFiles[i+1].getName()+" %tc\n", ti1);
}
}
sessions.add(new Pair<Integer, Integer>(currentStart, wavFiles.length));
for (int i = 0; i < sessions.size(); i++) {
Pair<Integer, Integer> session = sessions.get(i);
double avgEnergySession = computeAverageEnergy(wavFiles, session.getFirst(), session.getSecond(),
stepsComplete * stepProgress + session.getFirst() * stepProgress / wavFiles.length, stepsComplete
* stepProgress + session.getSecond() * stepProgress / wavFiles.length);
System.out.printf(Locale.US, "Session at %tc: %d files, avg. Energy: %f\n",
wavFiles[session.getFirst()].lastModified(), session.getSecond() - session.getFirst(),
avgEnergySession);
sessionEnergies.add(avgEnergySession);
if (avgEnergySession > maxEnergy)
maxEnergy = avgEnergySession;
}
// Amplitude factors:
// db1 = 10 * log10(A1^2), db2 = 10 * log10(A2^2)
// => energy difference db2 - db1 corresponds to amplitude factor:
// factor = sqrt(10^((db2 - db1)/10))
for (int s = 0, max = sessions.size(); s < max; s++) {
Pair<Integer, Integer> session = sessions.get(s);
double energy = sessionEnergies.get(s);
if (maxEnergy - energy < 1.e-15) { // energy == maxEnergy
continue;
}
double factor = Math.sqrt(Math.pow(10., (maxEnergy - energy) / 10.));
System.out.println("Session " + s + " scaling factor: " + factor);
for (int i = session.getFirst(); i < session.getSecond(); i++) {
amplitudeFactors[i] = factor;
}
}
stepsComplete++;
}
// Find global maximum amplitude
if (maximiseAmplitude) {
double globalMaxAmplitude = 0.;
int globalMaxIndex = -1;
for (int i = 0; i < wavFiles.length; i++) {
progress = stepsComplete * stepProgress + i * stepProgress / wavFiles.length;
progressBar.setValue(progress);
double maxAmplitude = getMaxAbsAmplitude(wavFiles[i]) * amplitudeFactors[i];
if (maxAmplitude > globalMaxAmplitude) {
globalMaxAmplitude = maxAmplitude;
globalMaxIndex = i;
}
}
System.out.println("Maximum amplitude of " + globalMaxAmplitude
+ (powerNormalise ? " (after normalisation)" : "") + " found in file "
+ wavFiles[globalMaxIndex].getName());
System.out.println("Target maximum amplitude: " + targetMaxAmplitude);
double scalingFactor = targetMaxAmplitude / globalMaxAmplitude;
System.out.println("Applying scaling factor of " + scalingFactor + " to all files");
for (int i = 0; i < wavFiles.length; i++) {
amplitudeFactors[i] *= scalingFactor;
}
stepsComplete++;
}
for (int i = 0; i < wavFiles.length; i++) {
progress = stepsComplete * stepProgress + i * stepProgress / wavFiles.length;
progressBar.setValue(progress);
String wavFileName = wavFiles[i].getName();
System.out.println(wavFileName);
File outFile = new File(outDir, wavFileName);
if (outFile.exists()) {
outFile.delete();
}
AudioInputStream ais = AudioSystem.getAudioInputStream(wavFiles[i]);
// Enforce PCM_SIGNED encoding
if (!ais.getFormat().getEncoding().equals(AudioFormat.Encoding.PCM_SIGNED)) {
ais = AudioSystem.getAudioInputStream(AudioFormat.Encoding.PCM_SIGNED, ais);
}
if (stereoMono) {
ais = new AudioConverterUtils.Stereo2Mono(channel).apply(ais);
}
// If Audio is Mono then only remove Low Frequency Noise
if (highPassFilter && ais.getFormat().getChannels() == 1) {
ais = new AudioConverterUtils.HighPassFilter(50, 40).apply(ais);
}
if (powerNormalise || maximiseAmplitude) {
double factor = amplitudeFactors[i];
if (factor != 1.) {
ais = new EnergyNormaliser(factor * factor).apply(ais);
}
}
AudioSystem.write(ais, AudioFileFormat.Type.WAVE, outFile);
ais.close();
if (trimSilences) {
trimSilences(outFile);
}
if (downSample) {
samplingRateConverter(outFile.getAbsolutePath(), targetSampleRate);
}
}
progressBar.setValue(100);
System.out.println("Completed Audio Conversion successfully... Done.");
} catch (ArrayIndexOutOfBoundsException e) {
e.printStackTrace();
progressBar.setValue(0);
System.err
.println("Audio conversion failed for ArrayIndexOutOfBoundsException. Probably this is due because the file lenght is not a multiple of 1024/2048 samples.");
} catch (Exception e) {
e.printStackTrace();
progressBar.setValue(0);
System.err.println("Audio conversion failed.");
} finally {
progressBar.setStringPainted(false);
}
}
/**
* To choose a global value to get Best Dynamic Bits
*
* @param wavFileNames
* @param targetBitsPerSample
* @return globalBestShift
* @throws Exception
*/
@Deprecated
// using volume scaling integrated with energy normalisation instead.
private int bestShiftBits(File[] wavFiles, int targetBitsPerSample, int progressMax) throws Exception {
int globalBestShift = 0;
int interProgress = progressBar.getValue();
int stepProgress = progressMax - interProgress;
int progress = interProgress;
for (int i = 0; i < wavFiles.length; i++) {
progress = interProgress + i * stepProgress / wavFiles.length;
progressBar.setValue(progress);
AudioInputStream ais = AudioSystem.getAudioInputStream(wavFiles[i]);
if (!ais.getFormat().getEncoding().equals(AudioFormat.Encoding.PCM_SIGNED)) {
ais = AudioSystem.getAudioInputStream(AudioFormat.Encoding.PCM_SIGNED, ais);
}
if (stereoMono) {
ais = new AudioConverterUtils.Stereo2Mono(channel).apply(ais);
}
// If Audio is Mono then only remove Low Frequency Noise
if (highPassFilter && ais.getFormat().getChannels() == 1) {
ais = new AudioConverterUtils.HighPassFilter(50, 40).apply(ais);
}
int[] samples = AudioConverterUtils.getSamples(ais);
int maxBitPos = 0;
int valueAfterShift;
int bitsPerSample = ais.getFormat().getSampleSizeInBits();
for (int k = 0; k < samples.length; k++) {
for (int j = bitsPerSample; j >= 1; j--) {
valueAfterShift = Math.abs(samples[k]) >> j;
if (valueAfterShift != 0) {
if (maxBitPos < j)
maxBitPos = j;
break;
}
}
}
ais.close();
int bestShift = maxBitPos - targetBitsPerSample + 2;
if (bestShift > globalBestShift) {
globalBestShift = bestShift;
}
}
return globalBestShift;
}
/**
* Sampling Rate Conversion doing with SOX.
*
* @param outpath
* @param targetSamplingRate
* @throws IOException
*/
private void samplingRateConverter(String waveFile, int targetSamplingRate) throws IOException {
Runtime rtime = Runtime.getRuntime();
String soxCommandLine = soxPath + " " + waveFile + " -r " + targetSamplingRate + " tempOut.wav";
Process process = rtime.exec(soxCommandLine);
try {
process.waitFor();
} catch (InterruptedException e) {
e.printStackTrace();
}
File outFile = new File("tempOut.wav");
if (!outFile.renameTo(new File(waveFile)))
FileUtils.copy(outFile.getAbsolutePath(), waveFile);
}
private double computeAverageEnergy(File[] wavFiles, int pos, int end, int progressFrom, int progressTo)
throws UnsupportedAudioFileException, IOException {
int len = end - pos;
double[] energies = new double[len];
for (int i = 0; i < len; i++) {
int progress = progressFrom + i * (progressTo - progressFrom) / len;
progressBar.setValue(progress);
energies[i] = computeAverageEnergy(wavFiles[pos + i]);
}
return MathUtils.median(energies);
}
private double computeAverageEnergy(File wavFile) throws UnsupportedAudioFileException, IOException {
AudioInputStream ais = AudioSystem.getAudioInputStream(wavFile);
int sampleRate = (int) ais.getFormat().getSampleRate();
int framelength = (int) (0.01 /* seconds */* sampleRate);
double[] audioData = new AudioDoubleDataSource(ais).getAllData();
DoubleDataSource audio = new BufferedDoubleDataSource(audioData);
EnergyAnalyser energyAnalyser = new EnergyAnalyser_dB(audio, framelength, sampleRate);
PitchFileHeader f0TrackerParams = new PitchFileHeader();
f0TrackerParams.windowSizeInSeconds = energyAnalyser.getFrameLengthTime();
f0TrackerParams.skipSizeInSeconds = energyAnalyser.getFrameShiftTime();
f0TrackerParams.fs = sampleRate;
F0TrackerAutocorrelationHeuristic f0Tracker = new F0TrackerAutocorrelationHeuristic(f0TrackerParams);
audio = new BufferedDoubleDataSource(audioData);
f0Tracker.pitchAnalyze(audio);
double[] f0Contour = f0Tracker.getF0Contour();
// Now determine speech stretches, and compute average frame energy within those
int energyBufferLength = 20;
double speechStartLikelihood = 0.1; // count as speech if at least 10% of recent frames are above threshold
double speechEndLikelihood = 0.1; // count as non-speech if up to 10% of recent frames are below threshold
double shiftFromMinimumEnergyCenter = 0.1; // threshold is at 10% of the distance between smallest and biggest cluster
int numClusters = 4;
double[][] speechStretches = energyAnalyser.getSpeechStretchesUsingEnergyHistory(energyBufferLength,
speechStartLikelihood, speechEndLikelihood, shiftFromMinimumEnergyCenter, numClusters);
FrameBasedAnalyser.FrameAnalysisResult<Double>[] energies = energyAnalyser.analyseAllFrames();
int numSpeechFrames = 0;
int numFrames = Math.min(energies.length, f0Contour.length);
// an upper bound for the number of speech frames is numFrames:
double[] speechFrameEnergies = new double[numFrames];
int iCurrentFrame = 0;
for (int i = 0; i < speechStretches.length; i++) {
// System.out.println("Speech stretch "+i+": "+speechStretches[i][0]+" -> "+speechStretches[i][1]);
while (iCurrentFrame < numFrames && energies[iCurrentFrame].getStartTime() < speechStretches[i][0]) {
iCurrentFrame++;
}
while (iCurrentFrame < numFrames && energies[iCurrentFrame].getStartTime() < speechStretches[i][1]) {
// Current frame is speech frame
// If it is also voiced, use its energy:
if (f0Contour[iCurrentFrame] > 10) { // voiced
speechFrameEnergies[numSpeechFrames] = energies[iCurrentFrame].get();
numSpeechFrames++;
}
iCurrentFrame++;
}
}
if (numSpeechFrames == 0) {
System.err.println("No speech found in file " + wavFile.getAbsolutePath());
return 0;
}
double medianSpeechFrameEnergy = MathUtils.median(speechFrameEnergies, 0, numSpeechFrames);
double medianSpeechSampleEnergy = medianSpeechFrameEnergy / energyAnalyser.getFrameLengthSamples();
return medianSpeechSampleEnergy;
}
private double getMaxAbsAmplitude(File wavFile) throws UnsupportedAudioFileException, IOException {
double maxAmplitude = 0.;
AudioInputStream ais = AudioSystem.getAudioInputStream(wavFile);
double[] audioData = new AudioDoubleDataSource(ais).getAllData();
for (int i = 0; i < audioData.length; i++) {
if (audioData[i] > maxAmplitude || -audioData[i] > maxAmplitude) {
maxAmplitude = Math.abs(audioData[i]);
}
}
ais.close();
return maxAmplitude;
}
private void trimSilences(File wavFile) throws UnsupportedAudioFileException, IOException {
// We hard-code the values here. Use marytts.tools.voiceimport.EndpointDetector if you want to tune them.
int energyBufferLength = 20;
double speechStartLikelihood = 0.1;
double speechEndLikelihood = 0.1;
double shiftFromMinimumEnergyCenter = 0.0;
int numClusters = 4;
double minimumStartSilenceInSeconds = 0.5;
double minimumEndSilenceInSeconds = 0.5;
File tmpFile = new File("tmpAudio.wav");
AudioConverterUtils.removeEndpoints(wavFile.getAbsolutePath(), tmpFile.getAbsolutePath(), energyBufferLength,
speechStartLikelihood, speechEndLikelihood, shiftFromMinimumEnergyCenter, numClusters,
minimumStartSilenceInSeconds, minimumEndSilenceInSeconds);
if (!tmpFile.renameTo(wavFile))
FileUtils.copy(tmpFile.getAbsolutePath(), wavFile.getAbsolutePath());
}
}
}