/**
* Copyright 2004-2006 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.signalproc.analysis;
import java.util.ArrayList;
import java.util.List;
import marytts.signalproc.window.Window;
import marytts.util.data.DoubleDataSource;
/**
*
* @author Marc Schröder
*
* A common basis for F0 tracking algorithms. The following main steps are assumed: 1. preprocessing of the signal 2.
* estimation of candidates for F0 3. selection of a path through the candidates 4. post-processing of the F0 contour
*
*/
public abstract class F0Tracker {
public static final int DEFAULT_MINF0 = 70;
public static final int DEFAULT_MAXF0 = 700;
protected TransitionCost transitionCost;
public F0Tracker() {
this.transitionCost = getTransitionCost();
}
public F0Contour analyse(DoubleDataSource signal, int samplingRate) {
DoubleDataSource preprocessedSignal = preprocess(signal);
FrameBasedAnalyser candidateEstimator = getCandidateEstimator(preprocessedSignal, samplingRate);
// Go through the frames, collect the candidates, and then find the best path through the
// candidates
F0Contour f0 = new F0Contour(transitionCost, candidateEstimator.getFrameShiftTime());
FrameBasedAnalyser.FrameAnalysisResult oneResult;
while ((oneResult = candidateEstimator.analyseNextFrame()) != null) {
f0.addFrameAnalysis((F0Candidate[]) oneResult.get());
}
f0.findPath();
return f0;
}
protected abstract DoubleDataSource preprocess(DoubleDataSource signal);
protected abstract FrameBasedAnalyser getCandidateEstimator(DoubleDataSource preprocessedSignal, int samplingRate);
protected abstract TransitionCost getTransitionCost();
public class F0Candidate {
protected double frequency;
protected double score;
/**
* Create a default F0 candidate, representing the option "unvoiced", score 0.
*/
protected F0Candidate() {
frequency = Double.NaN;
score = 0;
}
protected F0Candidate(double frequency, double score) {
this.frequency = frequency;
this.score = score;
}
public boolean betterThan(F0Candidate other) {
return this.score > other.score;
}
}
public abstract class TransitionCost {
protected TransitionCost() {
}
protected abstract double getCost(F0Candidate a, F0Candidate b);
}
public abstract class CandidateEstimator extends FrameBasedAnalyser {
protected int nCandidates;
public CandidateEstimator(DoubleDataSource signal, Window window, int frameShift, int samplingRate, int nCandidates) {
super(signal, window, frameShift, samplingRate);
this.nCandidates = nCandidates;
}
/**
* Apply this FrameBasedAnalyser to the given data.
*
* @param frame
* the data to analyse, which must be of the length prescribed by this FrameBasedAnalyser, i.e. by
* {@link #getFrameLengthSamples()}.
* @return an array of F0Candidates
* @throws IllegalArgumentException
* if frame does not have the prescribed length
*/
public Object analyse(double[] frame) {
if (frame.length != getFrameLengthSamples())
throw new IllegalArgumentException("Expected frame of length " + getFrameLengthSamples() + ", got "
+ frame.length);
F0Candidate[] candidates = new F0Candidate[nCandidates];
candidates[0] = new F0Candidate(); // default = unvoiced
findCandidates(candidates, frame);
normaliseCandidatesScores(candidates);
return candidates;
}
protected abstract void findCandidates(F0Candidate[] candidates, double[] frame);
protected void normaliseCandidatesScores(F0Candidate[] candidates) {
assert candidates != null;
int iBest = 0;
for (int i = 0; i < candidates.length; i++) {
if (candidates[i] == null)
break;
else if (candidates[i].betterThan(candidates[iBest]))
iBest = i;
}
double bestScore = candidates[iBest].score;
if (bestScore == 0)
return;
// Normalise scores relative to best score:
for (int i = 0; i < candidates.length; i++) {
if (candidates[i] == null)
break;
candidates[i].score /= bestScore;
}
}
protected void addCandidate(F0Candidate[] candidates, F0Candidate newCandidate) {
// If there is still space left in candidates, simply add the new candidate;
// else, replace the weakest candidate with the new one if the new one is better.
int iWorst = 0;
for (int i = 0; i < candidates.length; i++) {
if (candidates[i] == null) {
candidates[i] = newCandidate;
return;
} else if (candidates[iWorst].betterThan(candidates[i])) {
iWorst = i;
}
}
if (newCandidate.betterThan(candidates[iWorst])) {
candidates[iWorst] = newCandidate;
}
}
}
public class F0Contour {
protected List candidateLattice;
protected double[] contour;
protected TransitionCost transitionCost;
protected double frameShiftTime;
protected F0Contour(TransitionCost transitionCost, double frameShiftTime) {
candidateLattice = new ArrayList();
contour = null;
this.transitionCost = transitionCost;
this.frameShiftTime = frameShiftTime;
}
public F0Contour(String ptcFile) {
}
protected void addFrameAnalysis(F0Candidate[] candidates) {
candidateLattice.add(candidates);
}
protected void findPath() {
assert candidateLattice != null;
assert contour == null;
contour = new double[candidateLattice.size()];
for (int i = 0; i < contour.length; i++) {
contour[i] = getBest(i).frequency;
}
}
protected F0Candidate getBest(int index) {
F0Candidate[] candidates = (F0Candidate[]) candidateLattice.get(index);
assert candidates.length >= 1;
int iBest = 0;
for (int i = 0; i < candidates.length; i++) {
if (candidates[i] == null)
break;
if (candidates[i].betterThan(candidates[iBest]))
iBest = i;
}
return candidates[iBest];
}
public double[] getContour() {
return contour;
}
public double getFrameShiftTime() {
return frameShiftTime;
}
}
}