/**
* Copyright 2006 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.signalproc.process;
import java.io.File;
import java.io.FileReader;
import java.util.Arrays;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import marytts.signalproc.window.DynamicTwoHalvesWindow;
import marytts.util.data.DoubleDataSource;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.data.text.ESTTextfileDoubleDataSource;
/**
* @author Marc Schröder
*
* Cut frames out of a given signal, and provide them one by one, optionally applying a processor to the frame. This
* implementation provides non-overlapping frames of varying length, delimited by a series of markers (e.g., pitchmarks).
*
*/
public class PitchFrameProvider extends FrameProvider {
protected DoubleDataSource pitchmarks;
protected int[] periodLengths;
protected int shiftPeriods;
protected int periodsInMemory;
protected long currPitchmark;
protected DynamicTwoHalvesWindow twoHalvesWindow;
protected double[] cutFrame;
/**
* Create a new PitchFrameProvider providing one period at a time.
*
* @param signal
* audio signal
* @param pitchmarks
* an array of pitchmarks; each pitch mark is in seconds from signal start
* @param processor
* an optional processor to apply to each input frame (e.g., a DynamicWindow)
* @param samplingRate
* number of samples per second in signal
*/
public PitchFrameProvider(DoubleDataSource signal, DoubleDataSource pitchmarks, InlineDataProcessor processor,
int samplingRate) {
this(signal, pitchmarks, processor, samplingRate, 1, 1);
}
/**
* Create a new PitchFrameProvider with a configurable number of pitch periods per frame and pitch periods to shift by.
*
* @param signal
* audio signal
* @param pitchmarks
* an array of pitchmarks; each pitch mark is in seconds from signal start
* @param processor
* an optional processor to apply to each input frame (e.g., a DynamicWindow)
* @param samplingRate
* number of samples per second in signal
* @param framePeriods
* number of periods that each frame should contain
* @param shiftPeriods
* number of periods that frames should be shifted by
*/
public PitchFrameProvider(DoubleDataSource signal, DoubleDataSource pitchmarks, InlineDataProcessor processor,
int samplingRate, int framePeriods, int shiftPeriods) {
super(signal, null, 0, 0, samplingRate, true);
this.pitchmarks = pitchmarks;
this.periodLengths = new int[framePeriods];
this.shiftPeriods = shiftPeriods;
this.periodsInMemory = 0;
this.currPitchmark = 0;
// Need to treat an asymmetric window differently, because we need to know
// the pitchmark position in the "middle" of the window.
if (processor instanceof DynamicTwoHalvesWindow)
twoHalvesWindow = (DynamicTwoHalvesWindow) processor;
else
this.processor = processor;
}
/**
* Read data from input signal into current frame. This implementation will attempt to read up to the next pitch mark, filling
* the frame from the position given in nPrefilled onwards and extending the size of frame if necessary. Note that this
* implementation will perform zero-padding of periods at the beginning and end of the signal: when the first shiftPeriods
* periods are read, (framePeriods-shiftPeriods) empty periods (zero signal), equal in length to the first period, will be
* added to the left; after the end of the signal, (framePeriods-shiftPeriods) empty periods (zero signal), equal in length to
* the last period, will be added to the right.
*
* @param nPrefilled
* number of valid values at the beginning of frame. These should not be lost or overwritten.
* @return the number of new values read into frame at position nPrefilled. 0 signals that no further data can be read.
*/
protected int getData(int nPrefilled) {
// When we get here, we assume that either there is more input data or there is some still in memory.
assert hasMoreData();
// write into frame at position nPrefilled.
int nPeriodsToGet;
if (nPrefilled == 0) { // first time, read full frame
nPeriodsToGet = periodLengths.length;
periodsInMemory = 0;
} else { // next times, just the new ones
nPeriodsToGet = shiftPeriods;
// remember period lengths:
System.arraycopy(periodLengths, shiftPeriods, periodLengths, 0, periodLengths.length - shiftPeriods);
// For post-padding empty periods: keep track of how many real periods we have;
// this is in addition to nPrefilled, which counts data for real and padded periods.
periodsInMemory -= shiftPeriods;
}
int pos = nPrefilled;
for (int i = periodLengths.length - nPeriodsToGet; i < periodLengths.length; i++) {
// We read up to the end of signal and pitchmarks, whichever is shorter:
if (signal.hasMoreData() && pitchmarks.hasMoreData()) {
long prevPitchmark = currPitchmark;
double pitchmarkInSeconds = pitchmarks.getData(1)[0];
currPitchmark = (long) Math.round(pitchmarkInSeconds * samplingRate);
periodLengths[i] = (int) (currPitchmark - prevPitchmark);
// Plausibility check: Do not allow periods longer than 30 ms (33 Hz) or shorter than 1 ms (1000 Hz)!
assert periodLengths[i] < samplingRate / 33 : "Found pitch period longer than 30 ms (less than 33 Hz)";
assert periodLengths[i] > samplingRate / 1000 : "Found pitch period shorter than 1 ms (more than 1000 Hz)";
if (pos + periodLengths[i] > frame.length) {
// need to increase frame size
double[] oldFrame = frame;
frame = new double[pos + periodLengths[i]];
if (pos > 0)
System.arraycopy(oldFrame, 0, frame, 0, pos);
}
int read = signal.getData(frame, pos, periodLengths[i]);
assert read == periodLengths[i] : "expected " + periodLengths[i] + ", got " + read;
pos += read;
periodsInMemory++;
} else { // no more input data
// Stop condition: only output if there is at least one valid period in frame
if (periodsInMemory <= 0)
return 0;
// fill up periods as long as the last periods, but with zero data
assert i > 0;
periodLengths[i] = periodLengths[i - 1];
if (pos + periodLengths[i] > frame.length) {
// need to increase frame size
double[] oldFrame = frame;
frame = new double[pos + periodLengths[i]];
if (pos > 0)
System.arraycopy(oldFrame, 0, frame, 0, pos);
}
Arrays.fill(frame, pos, pos + periodLengths[i], 0);
pos += periodLengths[i];
}
}
frameShift = 0;
for (int i = 0; i < shiftPeriods; i++)
frameShift += periodLengths[i];
frameLength = 0;
for (int i = 0; i < periodLengths.length; i++)
frameLength += periodLengths[i];
return pos - nPrefilled;
}
/**
* Provide the next frame of data.
*
* @return the next frame on success, null on failure.
*/
public double[] getNextFrame() {
double[] uncutFrame = super.getNextFrame();
if (uncutFrame == null) {
cutFrame = null;
return null;
}
int frameLength = super.getFrameLengthSamples();
cutFrame = new double[frameLength];
System.arraycopy(uncutFrame, 0, cutFrame, 0, frameLength);
if (twoHalvesWindow != null) {
// using two half windows only makes sense if we have an even number of
// pitch periods in frame
assert periodLengths.length % 2 == 0 : "Using two half windows makes sense only for an even number of periods per frame";
int middle = 0;
for (int i = 0; i < periodLengths.length / 2; i++) {
middle += periodLengths[i];
}
assert middle < frameLength : "Middle " + middle + " larger than framelength " + frameLength + "!";
twoHalvesWindow.applyInlineLeftHalf(cutFrame, 0, middle);
twoHalvesWindow.applyInlineRightHalf(cutFrame, middle, frameLength - middle);
}
return cutFrame;
}
public double[] getCurrentFrame() {
return cutFrame;
}
/**
* The number of periods provided in one frame.
*
* @return length of periodLengths
*/
public int getFramePeriods() {
return periodLengths.length;
}
/**
* The number of periods by which the analysis window is shifted.
*
* @return shiftPeriods
*/
public int getShiftPeriods() {
return shiftPeriods;
}
/**
* Whether or not this frameprovider can provide another frame.
*/
public boolean hasMoreData() {
return signal.hasMoreData() && pitchmarks.hasMoreData() || periodsInMemory - shiftPeriods > 0;
}
/**
* Test this pitch frame provider, by printing information about the pitch frames of an audio file.
*
* @param args
* two args are expected: the name of an audio file, and the name of the corresponding pitch mark file.
* @throws Exception
* Exception
*/
public static void main(String[] args) throws Exception {
File audioFile = new File(args[0]);
File pitchmarkFile = new File(args[1]);
AudioInputStream ais = AudioSystem.getAudioInputStream(audioFile);
int samplingRate = (int) ais.getFormat().getSampleRate();
DoubleDataSource signal = new AudioDoubleDataSource(ais);
DoubleDataSource pitchmarks = new ESTTextfileDoubleDataSource(new FileReader(pitchmarkFile));
PitchFrameProvider pfp = new PitchFrameProvider(signal, pitchmarks, null, samplingRate);
double[] frame = null;
int n = 0;
int avgF0 = 0;
while ((frame = pfp.getNextFrame()) != null) {
int periodLength = pfp.validSamplesInFrame();
if (periodLength > 0) {
int f0 = samplingRate / periodLength;
double frameStartTime = pfp.getFrameStartTime();
double frameEndTime = frameStartTime + pfp.getFrameLengthTime();
avgF0 += f0;
n++;
System.err.println("Frame " + frameStartTime + " - " + frameEndTime + " s: " + periodLength + " samples, " + f0
+ " Hz");
} else {
System.err.println("Read empty frame");
}
}
avgF0 /= n;
System.err.println("Average F0: " + avgF0 + " Hz");
}
}