/** * Copyright 2000-2009 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.signalproc.process; import java.util.Arrays; import marytts.signalproc.window.DynamicTwoHalvesWindow; import marytts.signalproc.window.Window; import marytts.util.data.BufferedDoubleDataSource; import marytts.util.data.DoubleDataSource; import marytts.util.data.SequenceDoubleDataSource; /** * A class to merge two audio signals, using pitch-synchronous frames. * * @author marc * */ public class FramewiseMerger extends FrameOverlapAddSource { protected DoubleDataSource labelTimes; protected DoubleDataSource otherLabelTimes; protected FrameProvider otherFrameProvider; protected double prevLabel, currentLabel, prevOtherLabel, currentOtherLabel; protected double localTimeStretchFactor = 1; /** * Create a new merger, creating audio by pitch-synchronous merging of audio frames from a source (aka the "signal") and a * target (aka the "other"), linearly mapping the corresponding times between the two sources. * * @param inputSource * the audio data for the signal * @param pitchmarks * the pitchmarks for the signal * @param samplingRate * the sampling rate for the signal * @param labelTimes * optionally, the label times for the signal, needed for time alignment between the signal and the other * @param otherSource * the audio data for the other * @param otherPitchmarks * the pitchmarks for the other * @param otherSamplingRate * the sampling rate for the other * @param otherLabelTimes * optionally, the label times for the other; if both are present, the time interval between the i-th and the * (i+1)-th label time is linearly stretched/squeezed in order to find the mapping frame for interpolation * @param merger * the signal processing method used for merging the properties of the "other" into the corresponding frame in the * "signal". */ public FramewiseMerger(DoubleDataSource inputSource, DoubleDataSource pitchmarks, int samplingRate, DoubleDataSource labelTimes, DoubleDataSource otherSource, DoubleDataSource otherPitchmarks, int otherSamplingRate, DoubleDataSource otherLabelTimes, InlineFrameMerger merger) { // Set up label times for time stretching: this.labelTimes = labelTimes; this.otherLabelTimes = otherLabelTimes; // set all current and previous labels to 0: prevLabel = 0; currentLabel = 0; prevOtherLabel = 0; currentOtherLabel = 0; InlineDataProcessor analysisWindow = new DynamicTwoHalvesWindow(Window.HANNING, 0.5); // Overlap-add a properly windowed first period by hand: // Read out the first pitchmark: double firstPitchmark = pitchmarks.getData(1)[0]; assert firstPitchmark > 0; // If the first pitchmark is too close (closer than 1ms) to origin, skip it: if (firstPitchmark < 0.001 * samplingRate) firstPitchmark = pitchmarks.getData(1)[0]; pitchmarks = new SequenceDoubleDataSource(new DoubleDataSource[] { new BufferedDoubleDataSource(new double[] { firstPitchmark }), pitchmarks }); int firstPeriodLength = (int) (firstPitchmark * samplingRate); double[] firstPeriod = new double[firstPeriodLength]; inputSource.getData(firstPeriod, 0, firstPeriodLength); inputSource = new SequenceDoubleDataSource(new DoubleDataSource[] { new BufferedDoubleDataSource(firstPeriod), inputSource }); this.memory = new double[2 * firstPeriodLength]; System.arraycopy(firstPeriod, 0, memory, firstPeriodLength, firstPeriodLength); analysisWindow.applyInline(memory, 0, memory.length); if (merger != null) { // Read out the first pitchmark: double firstOtherPitchmark = otherPitchmarks.getData(1)[0]; assert firstOtherPitchmark > 0; // If the first other pitchmark is too close (closer than 1ms) to origin, skip it: if (firstOtherPitchmark < 0.001 * otherSamplingRate) firstPitchmark = otherPitchmarks.getData(1)[0]; otherPitchmarks = new SequenceDoubleDataSource(new DoubleDataSource[] { new BufferedDoubleDataSource(new double[] { firstOtherPitchmark }), otherPitchmarks }); int firstOtherPeriodLength = (int) (firstOtherPitchmark * otherSamplingRate); double[] firstOtherPeriod = new double[firstOtherPeriodLength]; otherSource.getData(firstOtherPeriod, 0, firstOtherPeriodLength); otherSource = new SequenceDoubleDataSource(new DoubleDataSource[] { new BufferedDoubleDataSource(firstOtherPeriod), otherSource }); double[] frameToMerge = new double[2 * firstOtherPeriodLength]; System.arraycopy(firstOtherPeriod, 0, frameToMerge, firstOtherPeriodLength, firstOtherPeriodLength); merger.setFrameToMerge(frameToMerge); merger.applyInline(memory, 0, memory.length); } // Shift the data left in memory: System.arraycopy(memory, firstPeriodLength, memory, 0, firstPeriodLength); Arrays.fill(memory, firstPeriodLength, memory.length, 0); // And initialise frame providers for normal operation: this.frameProvider = new PitchFrameProvider(inputSource, pitchmarks, analysisWindow, samplingRate, 8, 1); this.otherFrameProvider = new PitchFrameProvider(otherSource, otherPitchmarks, analysisWindow, otherSamplingRate, 8, 1); this.processor = merger; } /** * Create a new merger, creating audio by merging of audio frames at a fixed frame rate, from a source (aka the "signal") and * a target (aka the "other"), linearly mapping the corresponding times between the two sources. * * @param inputSource * the audio data for the signal * @param frameLength * length of the fixed-length frames * @param samplingRate * the sampling rate for the signal * @param labelTimes * optionally, the label times for the signal, needed for time alignment between the signal and the other * @param otherSource * the audio data for the other * @param otherSamplingRate * the sampling rate for the other * @param otherLabelTimes * optionally, the label times for the other; if both are present, the time interval between the i-th and the * (i+1)-th label time is linearly stretched/squeezed in order to find the mapping frame for interpolation * @param merger * the signal processing method used for merging the properties of the "other" into the corresponding frame in the * "signal". */ public FramewiseMerger(DoubleDataSource inputSource, int frameLength, int samplingRate, DoubleDataSource labelTimes, DoubleDataSource otherSource, int otherSamplingRate, DoubleDataSource otherLabelTimes, InlineFrameMerger merger) { DoubleDataSource paddingOther1 = new BufferedDoubleDataSource(new double[3 * frameLength / 4]); DoubleDataSource paddedOtherSource = new SequenceDoubleDataSource(new DoubleDataSource[] { paddingOther1, otherSource }); this.otherFrameProvider = new FrameProvider(paddedOtherSource, Window.get(Window.HANNING, frameLength, 0.5), frameLength, frameLength / 4, samplingRate, true); this.blockSize = frameLength / 4; int inputFrameshift = blockSize; Window window = Window.get(Window.HANNING, frameLength + 1, 0.5); this.outputWindow = null; this.memory = new double[frameLength]; // This is used when the last input frame has already been read, // to do the last frame output properly: this.processor = merger; // We need to feed through (and discard) 3 (if overlapFraction == 3/4) // blocks of zeroes, so that the first three blocks are properly rebuilt. DoubleDataSource padding1 = new BufferedDoubleDataSource(new double[3 * inputFrameshift]); DoubleDataSource padding2 = new BufferedDoubleDataSource(new double[3 * inputFrameshift]); DoubleDataSource paddedSource = new SequenceDoubleDataSource(new DoubleDataSource[] { padding1, inputSource, padding2 }); this.frameProvider = new FrameProvider(paddedSource, window, frameLength, inputFrameshift, samplingRate, true); double[] dummy = new double[blockSize]; for (int i = 0; i < 3; i++) { // System.err.println("Discarding "+blockSize+" samples:"); getData(dummy, 0, blockSize); // this calls getNextFrame() indirectly } this.frameProvider.resetInternalTimer(); this.otherFrameProvider.resetInternalTimer(); // Only now, after initialising the overlap-add, set up the label times. // Set up label times for time stretching: this.labelTimes = labelTimes; this.otherLabelTimes = otherLabelTimes; // set all current and previous labels to 0: prevLabel = 0; currentLabel = 0; prevOtherLabel = 0; currentOtherLabel = 0; } /** * Get the next frame of input data. This method is called by prepareBlock() when preparing the output data to be read. This * implementation reads the data from the frameProvider. In addition, the appropriate "other" frame is identified; this is the * frame closest in starting time to the starting time of the next "signal" frame (i.e., the starting time of the return value * of this method), correcting for the label times. Concretely, if the start time t_s of the next signal frame is between * labelTimes[i] and labelTimes[i+1], then the optimal other frame starting time to would be: t_o = otherLabelTimes[i] + (t_s * - labelTimes[i])/(labelTimes[i+1]-labelTimes[i]) * (otherLabelTimes[i+1] - otherLabelTimes[i]) The other frame whose * starting time is closest to to will be prepared as for merging. * * @return the next signal frame. */ protected double[] getNextFrame() { double[] nextSignalFrame = frameProvider.getNextFrame(); double frameStart = frameProvider.getFrameStartTime(); // System.out.println("Getting signal frame, start time = "+frameStart); while (frameStart >= currentLabel) { // move to next label if (labelTimes == null || otherLabelTimes == null || !labelTimes.hasMoreData() || !otherLabelTimes.hasMoreData()) { currentLabel = Double.POSITIVE_INFINITY; localTimeStretchFactor = 1; } else { prevLabel = currentLabel; currentLabel = labelTimes.getData(1)[0]; assert currentLabel >= prevLabel; prevOtherLabel = currentOtherLabel; currentOtherLabel = otherLabelTimes.getData(1)[0]; assert currentOtherLabel >= prevOtherLabel; // System.out.println("current label: "+currentLabel+"("+prevLabel+")"); // System.out.println("other label: "+currentOtherLabel+"("+prevOtherLabel+")"); if (currentLabel == prevLabel || currentOtherLabel == prevOtherLabel) { localTimeStretchFactor = 1; } else { localTimeStretchFactor = (currentOtherLabel - prevOtherLabel) / (currentLabel - prevLabel); } } } assert prevLabel <= frameStart && frameStart < currentLabel; // System.out.println("Local time stretch = "+localTimeStretchFactor); double targetOtherStart = prevOtherLabel + (frameStart - prevLabel) * localTimeStretchFactor; // System.out.println("Target other start = "+targetOtherStart); double otherStart = otherFrameProvider.getFrameStartTime(); double[] otherFrame = otherFrameProvider.getCurrentFrame(); double prevOtherStart = -1; double[] prevOtherFrame = null; // System.out.println("Current other frame starts at "+otherStart); if (otherStart < 0) { // no other frame yet otherFrame = otherFrameProvider.getNextFrame(); otherStart = otherFrameProvider.getFrameStartTime(); // System.out.println("Getting first other frame -- starts at "+otherStart); } assert otherStart >= 0; // Now skip other frames until the current otherStart is closer to targetOtherStart // then the next one would be. double expectedNextOtherStart = otherStart + otherFrameProvider.getFrameShiftTime(); // while (Math.abs(expectedNextOtherStart-targetOtherStart)<Math.abs(otherStart-targetOtherStart) while (otherStart < targetOtherStart && otherFrameProvider.hasMoreData()) { prevOtherFrame = (double[]) otherFrame.clone(); prevOtherStart = otherStart; otherFrame = otherFrameProvider.getNextFrame(); otherStart = otherFrameProvider.getFrameStartTime(); // System.out.println("Skipping frame -- new one starts at "+otherStart); assert Math.abs(otherStart - expectedNextOtherStart) < 1e-10 : "Other frame starts at " + otherStart + " -- expected was " + expectedNextOtherStart; expectedNextOtherStart = otherStart + otherFrameProvider.getFrameShiftTime(); } if (prevOtherFrame == null) { ((InlineFrameMerger) processor).setFrameToMerge(otherFrame); } else { assert prevOtherStart < targetOtherStart; assert targetOtherStart <= otherStart || !otherFrameProvider.hasMoreData(); if (targetOtherStart > otherStart) targetOtherStart = otherStart; // Request interpolation between prevOtherFrame and otherFrame in relation to their distance to targetOtherStart // Linear interpolation: double rPrev = 1 - (targetOtherStart - prevOtherStart) / (otherStart - prevOtherStart); assert 0 <= rPrev; assert rPrev < 1; // PrintfFormat f = new PrintfFormat("%.3f"); // System.out.println("Prev: "+f.sprintf(prevOtherStart)+" Target: "+f.sprintf(targetOtherStart)+" Other: "+f.sprintf(otherStart)+" rPrev: "+f.sprintf(rPrev)); ((InlineFrameMerger) processor).setFrameToMerge(prevOtherFrame, otherFrame, rPrev); } return nextSignalFrame; } /** * Output blocksize -- here, this is the same as the input frame shift. */ protected int getBlockSize() { return frameProvider.getFrameShiftSamples(); } /** * @param args * args */ public static void main(String[] args) { // TODO Auto-generated method stub } }