/** * Copyright 2007 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.util.data.audio; import java.io.File; import java.io.FilenameFilter; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.List; import java.util.Locale; import javax.sound.sampled.AudioFileFormat; import javax.sound.sampled.AudioFormat; import javax.sound.sampled.AudioInputStream; import javax.sound.sampled.AudioSystem; import javax.sound.sampled.UnsupportedAudioFileException; import javax.swing.JFileChooser; import javax.swing.JOptionPane; import marytts.signalproc.analysis.EnergyAnalyser; import marytts.signalproc.analysis.EnergyAnalyser_dB; import marytts.signalproc.analysis.F0TrackerAutocorrelationHeuristic; import marytts.signalproc.analysis.FrameBasedAnalyser; import marytts.signalproc.analysis.PitchFileHeader; import marytts.signalproc.process.EnergyNormaliser; import marytts.util.Pair; import marytts.util.data.BufferedDoubleDataSource; import marytts.util.data.DoubleDataSource; import marytts.util.io.FileUtils; import marytts.util.math.MathUtils; /** * AudioConverterGUI.java * * Created on 24. Juni 2009, 10:04 * * @author marc */ public class AudioConverterGUI extends javax.swing.JFrame { /** Creates new form AudioConverterGUI */ public AudioConverterGUI() { initComponents(); } /** * This method is called from within the constructor to initialize the form. WARNING: Do NOT modify this code. The content of * this method is always regenerated by the Form Editor. */ // <editor-fold defaultstate="collapsed" desc=" Generated Code ">//GEN-BEGIN:initComponents private void initComponents() { java.awt.GridBagConstraints gridBagConstraints; buttonGroup1 = new javax.swing.ButtonGroup(); jLabel1 = new javax.swing.JLabel(); tfInputDir = new javax.swing.JTextField(); bBrowseInputDir = new javax.swing.JButton(); bBrowseOutputDir = new javax.swing.JButton(); tfOutputDir = new javax.swing.JTextField(); jLabel2 = new javax.swing.JLabel(); jPanel1 = new javax.swing.JPanel(); cbStereoMono = new javax.swing.JCheckBox(); jLabel3 = new javax.swing.JLabel(); cbDownsample = new javax.swing.JCheckBox(); comboSampleRate = new javax.swing.JComboBox(); cbPowerNormalise = new javax.swing.JCheckBox(); cbBestOnly = new javax.swing.JCheckBox(); cbHighPassFilter = new javax.swing.JCheckBox(); cbGlobalAmplitude = new javax.swing.JCheckBox(); jLabel5 = new javax.swing.JLabel(); comboMaxAmplitude = new javax.swing.JComboBox(); jLabel6 = new javax.swing.JLabel(); tfSoxPath = new javax.swing.JTextField(); bBrowseSoxPath = new javax.swing.JButton(); cbTrimSilences = new javax.swing.JCheckBox(); jPanel2 = new javax.swing.JPanel(); rbLeft = new javax.swing.JRadioButton(); rbRight = new javax.swing.JRadioButton(); rbBoth = new javax.swing.JRadioButton(); jLabel4 = new javax.swing.JLabel(); progressBar = new javax.swing.JProgressBar(); bRun = new javax.swing.JButton(); bQuit = new javax.swing.JButton(); setDefaultCloseOperation(javax.swing.WindowConstants.EXIT_ON_CLOSE); setTitle("Speech Synthesis Recordings Audio Converter"); jLabel1.setText("Input Wave directory:"); bBrowseInputDir.setText("Browse"); bBrowseInputDir.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { browseInputDirActionPerformed(evt); } }); bBrowseOutputDir.setText("Browse"); bBrowseOutputDir.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { browseOutputDirActionPerformed(evt); } }); jLabel2.setText("Output Wave directory:"); jPanel1.setLayout(new java.awt.GridBagLayout()); jPanel1.setBorder(javax.swing.BorderFactory.createTitledBorder("Processing options")); cbStereoMono.setText("Stereo to Mono conversion"); cbStereoMono.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0)); cbStereoMono.setMargin(new java.awt.Insets(0, 0, 0, 0)); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 0; gridBagConstraints.gridy = 4; gridBagConstraints.gridwidth = 2; gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL; gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST; gridBagConstraints.insets = new java.awt.Insets(10, 0, 0, 0); jPanel1.add(cbStereoMono, gridBagConstraints); jLabel3.setText("Input source channel:"); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 0; gridBagConstraints.gridy = 5; gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST; gridBagConstraints.insets = new java.awt.Insets(0, 25, 10, 5); jPanel1.add(jLabel3, gridBagConstraints); cbDownsample.setText("Sampling rate conversion (down-sampling)"); cbDownsample.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0)); cbDownsample.setMargin(new java.awt.Insets(0, 0, 0, 0)); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 0; gridBagConstraints.gridy = 8; gridBagConstraints.gridwidth = 2; gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL; gridBagConstraints.insets = new java.awt.Insets(10, 0, 0, 0); jPanel1.add(cbDownsample, gridBagConstraints); comboSampleRate.setEditable(true); comboSampleRate.setModel(new javax.swing.DefaultComboBoxModel(new String[] { "16000", "22050" })); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 1; gridBagConstraints.gridy = 9; gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST; jPanel1.add(comboSampleRate, gridBagConstraints); cbPowerNormalise.setText("Power normalisation across recording sessions"); cbPowerNormalise .setToolTipText("Cluster wav files by timestamp, and normalise cluster averages.\n\nWav files recorded with less than 10 minutes gap between them are treated like a single recording session. Only clusters as a whole are normalised, in order to even out different recording volumes in different recording sessions.\n"); cbPowerNormalise.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0)); cbPowerNormalise.setMargin(new java.awt.Insets(0, 0, 0, 0)); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 0; gridBagConstraints.gridy = 3; gridBagConstraints.gridwidth = 2; gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST; gridBagConstraints.insets = new java.awt.Insets(0, 25, 10, 0); jPanel1.add(cbPowerNormalise, gridBagConstraints); cbBestOnly.setSelected(true); cbBestOnly.setText("Process only the best take of each sentence"); cbBestOnly.setToolTipText("Process a001.wav, but not a001a.wav, a001b.wav etc."); cbBestOnly.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0)); cbBestOnly.setMargin(new java.awt.Insets(0, 0, 0, 0)); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 0; gridBagConstraints.gridy = 0; gridBagConstraints.gridwidth = 2; gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL; gridBagConstraints.insets = new java.awt.Insets(0, 0, 10, 0); jPanel1.add(cbBestOnly, gridBagConstraints); cbHighPassFilter.setSelected(true); cbHighPassFilter.setText("Remove low-frequency noise below 50 Hz"); cbHighPassFilter.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0)); cbHighPassFilter.setMargin(new java.awt.Insets(0, 0, 0, 0)); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 0; gridBagConstraints.gridy = 6; gridBagConstraints.gridwidth = 2; gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL; gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST; gridBagConstraints.insets = new java.awt.Insets(10, 0, 0, 0); jPanel1.add(cbHighPassFilter, gridBagConstraints); cbGlobalAmplitude.setSelected(true); cbGlobalAmplitude.setText("Global amplitude scaling"); cbGlobalAmplitude.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0)); cbGlobalAmplitude.setMargin(new java.awt.Insets(0, 0, 0, 0)); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 0; gridBagConstraints.gridy = 1; gridBagConstraints.gridwidth = 2; gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL; gridBagConstraints.insets = new java.awt.Insets(10, 0, 0, 0); jPanel1.add(cbGlobalAmplitude, gridBagConstraints); jLabel5.setText("Target max. amplitude:"); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 0; gridBagConstraints.gridy = 2; gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST; gridBagConstraints.insets = new java.awt.Insets(0, 25, 0, 5); jPanel1.add(jLabel5, gridBagConstraints); comboMaxAmplitude.setEditable(true); comboMaxAmplitude.setModel(new javax.swing.DefaultComboBoxModel(new String[] { "0.5", "0.6", "0.7", "0.8", "0.9", "1.0 (maximum)", "1.1 (causes clipping)", "1.2 (causes clipping)", "1.3 (causes clipping)", "1.4 (causes clipping)", "1.5 (causes clipping)" })); comboMaxAmplitude.setSelectedIndex(4); comboMaxAmplitude.setMinimumSize(new java.awt.Dimension(70, 28)); comboMaxAmplitude.setPreferredSize(new java.awt.Dimension(70, 28)); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 1; gridBagConstraints.gridy = 2; gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST; jPanel1.add(comboMaxAmplitude, gridBagConstraints); jLabel6.setText("Location of sox binary:"); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 0; gridBagConstraints.gridy = 10; gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST; gridBagConstraints.insets = new java.awt.Insets(0, 25, 10, 5); jPanel1.add(jLabel6, gridBagConstraints); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 1; gridBagConstraints.gridy = 10; gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL; gridBagConstraints.insets = new java.awt.Insets(0, 0, 10, 0); jPanel1.add(tfSoxPath, gridBagConstraints); bBrowseSoxPath.setText("Browse"); bBrowseSoxPath.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { browseSoxActionPerformed(evt); } }); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 2; gridBagConstraints.gridy = 10; gridBagConstraints.insets = new java.awt.Insets(0, 0, 10, 0); jPanel1.add(bBrowseSoxPath, gridBagConstraints); cbTrimSilences.setSelected(true); cbTrimSilences.setText("Trim initial and final silences"); cbTrimSilences.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0)); cbTrimSilences.setMargin(new java.awt.Insets(0, 0, 0, 0)); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 0; gridBagConstraints.gridy = 7; gridBagConstraints.gridwidth = 2; gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL; gridBagConstraints.insets = new java.awt.Insets(10, 0, 10, 0); jPanel1.add(cbTrimSilences, gridBagConstraints); buttonGroup1.add(rbLeft); rbLeft.setSelected(true); rbLeft.setText("left"); rbLeft.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0)); rbLeft.setMargin(new java.awt.Insets(0, 0, 0, 0)); jPanel2.add(rbLeft); buttonGroup1.add(rbRight); rbRight.setText("right"); rbRight.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0)); rbRight.setMargin(new java.awt.Insets(0, 0, 0, 0)); jPanel2.add(rbRight); buttonGroup1.add(rbBoth); rbBoth.setText("both"); rbBoth.setBorder(javax.swing.BorderFactory.createEmptyBorder(0, 0, 0, 0)); rbBoth.setMargin(new java.awt.Insets(0, 0, 0, 0)); jPanel2.add(rbBoth); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 1; gridBagConstraints.gridy = 5; gridBagConstraints.gridwidth = 2; gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST; gridBagConstraints.insets = new java.awt.Insets(0, 0, 10, 0); jPanel1.add(jPanel2, gridBagConstraints); jLabel4.setText("Target sample rate (in Hz):"); gridBagConstraints = new java.awt.GridBagConstraints(); gridBagConstraints.gridx = 0; gridBagConstraints.gridy = 9; gridBagConstraints.fill = java.awt.GridBagConstraints.HORIZONTAL; gridBagConstraints.anchor = java.awt.GridBagConstraints.WEST; gridBagConstraints.insets = new java.awt.Insets(0, 25, 0, 5); jPanel1.add(jLabel4, gridBagConstraints); bRun.setText("Run"); bRun.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { runActionPerformed(evt); } }); bQuit.setText("Quit"); bQuit.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent evt) { bQuitActionPerformed(evt); } }); org.jdesktop.layout.GroupLayout layout = new org.jdesktop.layout.GroupLayout(getContentPane()); getContentPane().setLayout(layout); layout.setHorizontalGroup(layout .createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING) .add(org.jdesktop.layout.GroupLayout.TRAILING, layout.createSequentialGroup() .addContainerGap() .add(layout.createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING).add(jLabel1) .add(jLabel2)) .addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED) .add(layout.createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING) .add(tfOutputDir, org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, 217, Short.MAX_VALUE) .add(tfInputDir, org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, 217, Short.MAX_VALUE)) .addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED) .add(layout.createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING) .add(org.jdesktop.layout.GroupLayout.TRAILING, bBrowseOutputDir) .add(org.jdesktop.layout.GroupLayout.TRAILING, bBrowseInputDir)).addContainerGap()) .add(layout.createSequentialGroup().addContainerGap().add(jPanel1, 0, 465, Short.MAX_VALUE).addContainerGap()) .add(layout .createSequentialGroup() .add(80, 80, 80) .add(layout .createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING) .add(org.jdesktop.layout.GroupLayout.TRAILING, progressBar, org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, 358, Short.MAX_VALUE) .add(layout.createSequentialGroup().add(bRun) .addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED, 208, Short.MAX_VALUE) .add(bQuit))).add(67, 67, 67))); layout.setVerticalGroup(layout.createParallelGroup(org.jdesktop.layout.GroupLayout.LEADING).add( layout.createSequentialGroup() .addContainerGap() .add(layout .createParallelGroup(org.jdesktop.layout.GroupLayout.TRAILING) .add(layout.createSequentialGroup().add(bBrowseInputDir) .addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED).add(bBrowseOutputDir)) .add(layout .createSequentialGroup() .add(layout .createParallelGroup(org.jdesktop.layout.GroupLayout.BASELINE) .add(jLabel1) .add(tfInputDir, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE, org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE)) .addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED) .add(layout .createParallelGroup(org.jdesktop.layout.GroupLayout.BASELINE) .add(jLabel2) .add(tfOutputDir, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE, org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE)))) .addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED) .add(jPanel1, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE, org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE) .addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED) .add(progressBar, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE, org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, org.jdesktop.layout.GroupLayout.PREFERRED_SIZE) .addPreferredGap(org.jdesktop.layout.LayoutStyle.RELATED) .add(layout.createParallelGroup(org.jdesktop.layout.GroupLayout.BASELINE).add(bRun).add(bQuit)) .addContainerGap(org.jdesktop.layout.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE))); pack(); }// </editor-fold>//GEN-END:initComponents private void browseSoxActionPerformed(java.awt.event.ActionEvent evt) {// GEN-FIRST:event_browseSoxActionPerformed JFileChooser fc = new JFileChooser(); fc.setFileSelectionMode(JFileChooser.FILES_ONLY); int returnVal = fc.showOpenDialog(this); if (returnVal == JFileChooser.APPROVE_OPTION) { File file = fc.getSelectedFile(); // System.out.println("Opening: " + file.getAbsolutePath()); tfSoxPath.setText(file.getAbsolutePath()); } }// GEN-LAST:event_browseSoxActionPerformed private void bQuitActionPerformed(java.awt.event.ActionEvent evt) {// GEN-FIRST:event_bQuitActionPerformed System.exit(0); }// GEN-LAST:event_bQuitActionPerformed private void browseOutputDirActionPerformed(java.awt.event.ActionEvent evt) {// GEN-FIRST:event_browseOutputDirActionPerformed JFileChooser fc = new JFileChooser(); fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); int returnVal = fc.showOpenDialog(this); if (returnVal == JFileChooser.APPROVE_OPTION) { File file = fc.getSelectedFile(); // System.out.println("Opening: " + file.getAbsolutePath()); tfOutputDir.setText(file.getAbsolutePath()); } }// GEN-LAST:event_browseOutputDirActionPerformed private void browseInputDirActionPerformed(java.awt.event.ActionEvent evt) {// GEN-FIRST:event_browseInputDirActionPerformed JFileChooser fc = new JFileChooser(); fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); int returnVal = fc.showOpenDialog(this); if (returnVal == JFileChooser.APPROVE_OPTION) { File file = fc.getSelectedFile(); // System.out.println("Opening: " + file.getAbsolutePath()); tfInputDir.setText(file.getAbsolutePath()); } }// GEN-LAST:event_browseInputDirActionPerformed private void runActionPerformed(java.awt.event.ActionEvent evt) {// GEN-FIRST:event_runActionPerformed String inDirPath = tfInputDir.getText(); if (inDirPath.equals("")) { JOptionPane.showConfirmDialog(this, "Input directory not specified!", "Info missing", JOptionPane.OK_OPTION, JOptionPane.ERROR_MESSAGE); return; } String outDirPath = tfOutputDir.getText(); if (outDirPath.equals("")) { JOptionPane.showConfirmDialog(this, "Output directory not specified!", "Info missing", JOptionPane.OK_OPTION, JOptionPane.ERROR_MESSAGE); return; } boolean bestOnly = cbBestOnly.isSelected(); boolean stereoMono = cbStereoMono.isSelected(); int whichChannel = AudioPlayer.LEFT_ONLY; if (rbRight.isSelected()) whichChannel = AudioPlayer.RIGHT_ONLY; else if (rbBoth.isSelected()) whichChannel = AudioPlayer.STEREO; boolean downSample = cbDownsample.isSelected(); int targetSampleRate = Integer.parseInt((String) comboSampleRate.getSelectedItem()); String soxPath = tfSoxPath.getText(); if (downSample && !new File(soxPath).exists()) { JOptionPane.showConfirmDialog(this, "Please indicate location of 'sox' tool\n" + "or deactivate sample rate conversion.", "Info missing", JOptionPane.OK_OPTION); return; } boolean highPassFilter = cbHighPassFilter.isSelected(); boolean powerNormalise = cbPowerNormalise.isSelected(); boolean maximiseAmplitude = cbGlobalAmplitude.isSelected(); double targetMaxAmplitude = Double.parseDouble(((String) comboMaxAmplitude.getSelectedItem()).substring(0, 3)); boolean trimSilences = cbTrimSilences.isSelected(); if (!(stereoMono || downSample || highPassFilter || powerNormalise || trimSilences)) { JOptionPane.showConfirmDialog(this, "Nothing to do!", "Info missing", JOptionPane.OK_OPTION, JOptionPane.ERROR_MESSAGE); return; } // OK, we have a valid request progressBar.setStringPainted(true); try { new Converter(inDirPath, outDirPath, bestOnly, stereoMono, whichChannel, downSample, targetSampleRate, soxPath, highPassFilter, powerNormalise, maximiseAmplitude, targetMaxAmplitude, trimSilences).start(); } catch (IOException ioe) { ioe.printStackTrace(); progressBar.setStringPainted(false); } }// GEN-LAST:event_runActionPerformed /** * @param args * the command line arguments */ public static void main(String args[]) { java.awt.EventQueue.invokeLater(new Runnable() { public void run() { new AudioConverterGUI().setVisible(true); } }); } // Variables declaration - do not modify//GEN-BEGIN:variables private javax.swing.JButton bBrowseInputDir; private javax.swing.JButton bBrowseOutputDir; private javax.swing.JButton bBrowseSoxPath; private javax.swing.JButton bQuit; private javax.swing.JButton bRun; private javax.swing.ButtonGroup buttonGroup1; private javax.swing.JCheckBox cbBestOnly; private javax.swing.JCheckBox cbDownsample; private javax.swing.JCheckBox cbGlobalAmplitude; private javax.swing.JCheckBox cbHighPassFilter; private javax.swing.JCheckBox cbPowerNormalise; private javax.swing.JCheckBox cbStereoMono; private javax.swing.JCheckBox cbTrimSilences; private javax.swing.JComboBox comboMaxAmplitude; private javax.swing.JComboBox comboSampleRate; private javax.swing.JLabel jLabel1; private javax.swing.JLabel jLabel2; private javax.swing.JLabel jLabel3; private javax.swing.JLabel jLabel4; private javax.swing.JLabel jLabel5; private javax.swing.JLabel jLabel6; private javax.swing.JPanel jPanel1; private javax.swing.JPanel jPanel2; private javax.swing.JProgressBar progressBar; private javax.swing.JRadioButton rbBoth; private javax.swing.JRadioButton rbLeft; private javax.swing.JRadioButton rbRight; private javax.swing.JTextField tfInputDir; private javax.swing.JTextField tfOutputDir; private javax.swing.JTextField tfSoxPath; // End of variables declaration//GEN-END:variables public class Converter extends Thread { private File inDir; private File outDir; private FilenameFilter filenameFilter; private boolean bestOnly; private boolean stereoMono; private int channel; private boolean downSample; private int targetSampleRate; private String soxPath; private boolean highPassFilter; private boolean powerNormalise; private boolean maximiseAmplitude; private double targetMaxAmplitude; private boolean trimSilences; public Converter(String inDirPath, String outDirPath, final boolean bestOnly, boolean stereoMono, int channel, boolean downSample, int targetSampleRate, String soxPath, boolean highPassFilter, boolean powerNormalise, boolean maximiseAmplitude, double targetMaxAmplitude, boolean trimSilences) throws IOException { inDir = new File(inDirPath); outDir = new File(outDirPath); if (!outDir.exists()) outDir.mkdirs(); filenameFilter = new FilenameFilter() { public boolean accept(File file, String filename) { if (!filename.endsWith(".wav")) return false; char lastCharInBasename = filename.charAt(filename.length() - 5); if (bestOnly && (lastCharInBasename < '0' || lastCharInBasename > '9')) { return false; } return true; } }; this.bestOnly = bestOnly; this.stereoMono = stereoMono; this.channel = channel; this.downSample = downSample; this.targetSampleRate = targetSampleRate; this.soxPath = soxPath; this.highPassFilter = highPassFilter; this.powerNormalise = powerNormalise; this.maximiseAmplitude = maximiseAmplitude; this.targetMaxAmplitude = targetMaxAmplitude; this.trimSilences = trimSilences; } public void run() { int progress = 0; int numSteps = 1; int stepsComplete = 0; if (maximiseAmplitude) numSteps++; if (powerNormalise) numSteps++; int interProgress = 0; int stepProgress = 100 / numSteps; try { File[] wavFiles = inDir.listFiles(filenameFilter); System.out.println("Number of wave files to convert: " + wavFiles.length); double[] amplitudeFactors = new double[wavFiles.length]; Arrays.fill(amplitudeFactors, 1); // factor 1 = no change if (powerNormalise) { Arrays.sort(wavFiles, new Comparator<File>() { public int compare(File f1, File f2) { long t1 = f1.lastModified(); long t2 = f2.lastModified(); if (t1 < t2) return -1; if (t1 > t2) return 1; return 0; } public boolean equals(Object obj) { return false; } }); List<Pair<Integer, Integer>> sessions = new ArrayList<Pair<Integer, Integer>>(); List<Double> sessionEnergies = new ArrayList<Double>(); double maxEnergy = Double.NEGATIVE_INFINITY; // each session pair is the index numbers delimiting the session in typical java fashion, // i.e. from the first element to one higher than the last element. // e.g., (3,6) includes wavFiles[3],wavFiles[4],wavFiles[5] int currentStart = 0; for (int i = 0; i < wavFiles.length - 1; i++) { long ti = wavFiles[i].lastModified(); long ti1 = wavFiles[i + 1].lastModified(); // System.out.printf(Locale.US, wavFiles[i].getName()+" %tc\n", ti); assert ti1 >= ti; // we sorted it, didn't we if (ti1 - ti > 600000) { // 600.000 ms = 10 min // System.out.println("Break after "+wavFiles[i].getName()); System.out.println(); sessions.add(new Pair<Integer, Integer>(currentStart, i + 1)); currentStart = i + 1; } if (i == wavFiles.length - 2) { // System.out.printf(Locale.US, wavFiles[i+1].getName()+" %tc\n", ti1); } } sessions.add(new Pair<Integer, Integer>(currentStart, wavFiles.length)); for (int i = 0; i < sessions.size(); i++) { Pair<Integer, Integer> session = sessions.get(i); double avgEnergySession = computeAverageEnergy(wavFiles, session.getFirst(), session.getSecond(), stepsComplete * stepProgress + session.getFirst() * stepProgress / wavFiles.length, stepsComplete * stepProgress + session.getSecond() * stepProgress / wavFiles.length); System.out.printf(Locale.US, "Session at %tc: %d files, avg. Energy: %f\n", wavFiles[session.getFirst()].lastModified(), session.getSecond() - session.getFirst(), avgEnergySession); sessionEnergies.add(avgEnergySession); if (avgEnergySession > maxEnergy) maxEnergy = avgEnergySession; } // Amplitude factors: // db1 = 10 * log10(A1^2), db2 = 10 * log10(A2^2) // => energy difference db2 - db1 corresponds to amplitude factor: // factor = sqrt(10^((db2 - db1)/10)) for (int s = 0, max = sessions.size(); s < max; s++) { Pair<Integer, Integer> session = sessions.get(s); double energy = sessionEnergies.get(s); if (maxEnergy - energy < 1.e-15) { // energy == maxEnergy continue; } double factor = Math.sqrt(Math.pow(10., (maxEnergy - energy) / 10.)); System.out.println("Session " + s + " scaling factor: " + factor); for (int i = session.getFirst(); i < session.getSecond(); i++) { amplitudeFactors[i] = factor; } } stepsComplete++; } // Find global maximum amplitude if (maximiseAmplitude) { double globalMaxAmplitude = 0.; int globalMaxIndex = -1; for (int i = 0; i < wavFiles.length; i++) { progress = stepsComplete * stepProgress + i * stepProgress / wavFiles.length; progressBar.setValue(progress); double maxAmplitude = getMaxAbsAmplitude(wavFiles[i]) * amplitudeFactors[i]; if (maxAmplitude > globalMaxAmplitude) { globalMaxAmplitude = maxAmplitude; globalMaxIndex = i; } } System.out.println("Maximum amplitude of " + globalMaxAmplitude + (powerNormalise ? " (after normalisation)" : "") + " found in file " + wavFiles[globalMaxIndex].getName()); System.out.println("Target maximum amplitude: " + targetMaxAmplitude); double scalingFactor = targetMaxAmplitude / globalMaxAmplitude; System.out.println("Applying scaling factor of " + scalingFactor + " to all files"); for (int i = 0; i < wavFiles.length; i++) { amplitudeFactors[i] *= scalingFactor; } stepsComplete++; } for (int i = 0; i < wavFiles.length; i++) { progress = stepsComplete * stepProgress + i * stepProgress / wavFiles.length; progressBar.setValue(progress); String wavFileName = wavFiles[i].getName(); System.out.println(wavFileName); File outFile = new File(outDir, wavFileName); if (outFile.exists()) { outFile.delete(); } AudioInputStream ais = AudioSystem.getAudioInputStream(wavFiles[i]); // Enforce PCM_SIGNED encoding if (!ais.getFormat().getEncoding().equals(AudioFormat.Encoding.PCM_SIGNED)) { ais = AudioSystem.getAudioInputStream(AudioFormat.Encoding.PCM_SIGNED, ais); } if (stereoMono) { ais = new AudioConverterUtils.Stereo2Mono(channel).apply(ais); } // If Audio is Mono then only remove Low Frequency Noise if (highPassFilter && ais.getFormat().getChannels() == 1) { ais = new AudioConverterUtils.HighPassFilter(50, 40).apply(ais); } if (powerNormalise || maximiseAmplitude) { double factor = amplitudeFactors[i]; if (factor != 1.) { ais = new EnergyNormaliser(factor * factor).apply(ais); } } AudioSystem.write(ais, AudioFileFormat.Type.WAVE, outFile); ais.close(); if (trimSilences) { trimSilences(outFile); } if (downSample) { samplingRateConverter(outFile.getAbsolutePath(), targetSampleRate); } } progressBar.setValue(100); System.out.println("Completed Audio Conversion successfully... Done."); } catch (ArrayIndexOutOfBoundsException e) { e.printStackTrace(); progressBar.setValue(0); System.err .println("Audio conversion failed for ArrayIndexOutOfBoundsException. Probably this is due because the file lenght is not a multiple of 1024/2048 samples."); } catch (Exception e) { e.printStackTrace(); progressBar.setValue(0); System.err.println("Audio conversion failed."); } finally { progressBar.setStringPainted(false); } } /** * To choose a global value to get Best Dynamic Bits * * @param wavFileNames * @param targetBitsPerSample * @return globalBestShift * @throws Exception */ @Deprecated // using volume scaling integrated with energy normalisation instead. private int bestShiftBits(File[] wavFiles, int targetBitsPerSample, int progressMax) throws Exception { int globalBestShift = 0; int interProgress = progressBar.getValue(); int stepProgress = progressMax - interProgress; int progress = interProgress; for (int i = 0; i < wavFiles.length; i++) { progress = interProgress + i * stepProgress / wavFiles.length; progressBar.setValue(progress); AudioInputStream ais = AudioSystem.getAudioInputStream(wavFiles[i]); if (!ais.getFormat().getEncoding().equals(AudioFormat.Encoding.PCM_SIGNED)) { ais = AudioSystem.getAudioInputStream(AudioFormat.Encoding.PCM_SIGNED, ais); } if (stereoMono) { ais = new AudioConverterUtils.Stereo2Mono(channel).apply(ais); } // If Audio is Mono then only remove Low Frequency Noise if (highPassFilter && ais.getFormat().getChannels() == 1) { ais = new AudioConverterUtils.HighPassFilter(50, 40).apply(ais); } int[] samples = AudioConverterUtils.getSamples(ais); int maxBitPos = 0; int valueAfterShift; int bitsPerSample = ais.getFormat().getSampleSizeInBits(); for (int k = 0; k < samples.length; k++) { for (int j = bitsPerSample; j >= 1; j--) { valueAfterShift = Math.abs(samples[k]) >> j; if (valueAfterShift != 0) { if (maxBitPos < j) maxBitPos = j; break; } } } ais.close(); int bestShift = maxBitPos - targetBitsPerSample + 2; if (bestShift > globalBestShift) { globalBestShift = bestShift; } } return globalBestShift; } /** * Sampling Rate Conversion doing with SOX. * * @param outpath * @param targetSamplingRate * @throws IOException */ private void samplingRateConverter(String waveFile, int targetSamplingRate) throws IOException { Runtime rtime = Runtime.getRuntime(); String soxCommandLine = soxPath + " " + waveFile + " -r " + targetSamplingRate + " tempOut.wav"; Process process = rtime.exec(soxCommandLine); try { process.waitFor(); } catch (InterruptedException e) { e.printStackTrace(); } File outFile = new File("tempOut.wav"); if (!outFile.renameTo(new File(waveFile))) FileUtils.copy(outFile.getAbsolutePath(), waveFile); } private double computeAverageEnergy(File[] wavFiles, int pos, int end, int progressFrom, int progressTo) throws UnsupportedAudioFileException, IOException { int len = end - pos; double[] energies = new double[len]; for (int i = 0; i < len; i++) { int progress = progressFrom + i * (progressTo - progressFrom) / len; progressBar.setValue(progress); energies[i] = computeAverageEnergy(wavFiles[pos + i]); } return MathUtils.median(energies); } private double computeAverageEnergy(File wavFile) throws UnsupportedAudioFileException, IOException { AudioInputStream ais = AudioSystem.getAudioInputStream(wavFile); int sampleRate = (int) ais.getFormat().getSampleRate(); int framelength = (int) (0.01 /* seconds */* sampleRate); double[] audioData = new AudioDoubleDataSource(ais).getAllData(); DoubleDataSource audio = new BufferedDoubleDataSource(audioData); EnergyAnalyser energyAnalyser = new EnergyAnalyser_dB(audio, framelength, sampleRate); PitchFileHeader f0TrackerParams = new PitchFileHeader(); f0TrackerParams.windowSizeInSeconds = energyAnalyser.getFrameLengthTime(); f0TrackerParams.skipSizeInSeconds = energyAnalyser.getFrameShiftTime(); f0TrackerParams.fs = sampleRate; F0TrackerAutocorrelationHeuristic f0Tracker = new F0TrackerAutocorrelationHeuristic(f0TrackerParams); audio = new BufferedDoubleDataSource(audioData); f0Tracker.pitchAnalyze(audio); double[] f0Contour = f0Tracker.getF0Contour(); // Now determine speech stretches, and compute average frame energy within those int energyBufferLength = 20; double speechStartLikelihood = 0.1; // count as speech if at least 10% of recent frames are above threshold double speechEndLikelihood = 0.1; // count as non-speech if up to 10% of recent frames are below threshold double shiftFromMinimumEnergyCenter = 0.1; // threshold is at 10% of the distance between smallest and biggest cluster int numClusters = 4; double[][] speechStretches = energyAnalyser.getSpeechStretchesUsingEnergyHistory(energyBufferLength, speechStartLikelihood, speechEndLikelihood, shiftFromMinimumEnergyCenter, numClusters); FrameBasedAnalyser.FrameAnalysisResult<Double>[] energies = energyAnalyser.analyseAllFrames(); int numSpeechFrames = 0; int numFrames = Math.min(energies.length, f0Contour.length); // an upper bound for the number of speech frames is numFrames: double[] speechFrameEnergies = new double[numFrames]; int iCurrentFrame = 0; for (int i = 0; i < speechStretches.length; i++) { // System.out.println("Speech stretch "+i+": "+speechStretches[i][0]+" -> "+speechStretches[i][1]); while (iCurrentFrame < numFrames && energies[iCurrentFrame].getStartTime() < speechStretches[i][0]) { iCurrentFrame++; } while (iCurrentFrame < numFrames && energies[iCurrentFrame].getStartTime() < speechStretches[i][1]) { // Current frame is speech frame // If it is also voiced, use its energy: if (f0Contour[iCurrentFrame] > 10) { // voiced speechFrameEnergies[numSpeechFrames] = energies[iCurrentFrame].get(); numSpeechFrames++; } iCurrentFrame++; } } if (numSpeechFrames == 0) { System.err.println("No speech found in file " + wavFile.getAbsolutePath()); return 0; } double medianSpeechFrameEnergy = MathUtils.median(speechFrameEnergies, 0, numSpeechFrames); double medianSpeechSampleEnergy = medianSpeechFrameEnergy / energyAnalyser.getFrameLengthSamples(); return medianSpeechSampleEnergy; } private double getMaxAbsAmplitude(File wavFile) throws UnsupportedAudioFileException, IOException { double maxAmplitude = 0.; AudioInputStream ais = AudioSystem.getAudioInputStream(wavFile); double[] audioData = new AudioDoubleDataSource(ais).getAllData(); for (int i = 0; i < audioData.length; i++) { if (audioData[i] > maxAmplitude || -audioData[i] > maxAmplitude) { maxAmplitude = Math.abs(audioData[i]); } } ais.close(); return maxAmplitude; } private void trimSilences(File wavFile) throws UnsupportedAudioFileException, IOException { // We hard-code the values here. Use marytts.tools.voiceimport.EndpointDetector if you want to tune them. int energyBufferLength = 20; double speechStartLikelihood = 0.1; double speechEndLikelihood = 0.1; double shiftFromMinimumEnergyCenter = 0.0; int numClusters = 4; double minimumStartSilenceInSeconds = 0.5; double minimumEndSilenceInSeconds = 0.5; File tmpFile = new File("tmpAudio.wav"); AudioConverterUtils.removeEndpoints(wavFile.getAbsolutePath(), tmpFile.getAbsolutePath(), energyBufferLength, speechStartLikelihood, speechEndLikelihood, shiftFromMinimumEnergyCenter, numClusters, minimumStartSilenceInSeconds, minimumEndSilenceInSeconds); if (!tmpFile.renameTo(wavFile)) FileUtils.copy(tmpFile.getAbsolutePath(), wavFile.getAbsolutePath()); } } }