/**
* Copyright 2011 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.util.data;
import marytts.signalproc.analysis.Label;
/**
* @author marc
*
*/
public class AlignLabelsUtils {
public static int[][] alignLabels(Label[] labs1, Label[] labs2, double PDeletion, double PInsertion, double PSubstitution) {
double PCorrect = 1.0 - (PDeletion + PInsertion + PSubstitution);
int n = labs1.length;
int m = labs2.length;
double D;
int[][] labelMap = null;
if (n == 0 || m == 0) {
D = m;
return labelMap;
}
int i, j;
double[][] d = new double[n + 1][m + 1];
for (i = 0; i < d.length; i++) {
for (j = 0; j < d[i].length; j++)
d[i][j] = 0.0;
}
int[][] p = new int[n + 1][m + 1];
for (i = 0; i < p.length; i++) {
for (j = 0; j < p[i].length; j++)
p[i][j] = 0;
}
double z = 1;
d[0][0] = z;
for (i = 1; i <= n; i++)
d[i][0] = d[i - 1][0] * PDeletion;
for (j = 1; j <= m; j++)
d[0][j] = d[0][j - 1] * PInsertion;
String strEvents = "DISC";
double c;
double tmp;
for (i = 1; i <= n; i++) {
for (j = 1; j <= m; j++) {
if (labs1[i - 1].phn.compareTo(labs2[j - 1].phn) == 0)
c = PCorrect;
else
c = PSubstitution;
int ind = 1;
d[i][j] = d[i - 1][j] * PDeletion;
tmp = d[i][j - 1] * PInsertion;
if (tmp > d[i][j]) {
d[i][j] = tmp;
ind = 2;
}
tmp = d[i - 1][j - 1] * c;
if (tmp > d[i][j]) {
d[i][j] = tmp;
ind = 3;
}
if (ind == 3 && labs1[i - 1].phn.compareTo(labs2[j - 1].phn) == 0)
ind = 4;
// Events 1:Deletion, 2:Insertion, 3:Substitution, 4:Correct
p[i][j] = ind;
}
}
// Backtracking
D = d[n][m];
int k = 1;
int[] E = new int[m * n];
E[k - 1] = p[n][m];
i = n + 1;
j = m + 1;
int t = m;
while (true) {
if (E[k - 1] == 3 || E[k - 1] == 4) {
i = i - 1;
j = j - 1;
} else if (E[k - 1] == 2)
j = j - 1;
else if (E[k - 1] == 1)
i = i - 1;
if (p[i - 1][j - 1] == 0) {
while (j > 1) {
k = k + 1;
j = j - 1;
E[k - 1] = 2;
}
break;
} else {
k = k + 1;
E[k - 1] = p[i - 1][j - 1];
}
t = t - 1;
}
// Reverse the order
int[] Events = new int[k];
for (t = k; t >= 1; t--)
Events[t - 1] = E[k - t];
int[][] tmpLabelMap = new int[n * m][2];
int ind = 0;
int ind1 = 0;
int ind2 = 0;
for (t = 1; t <= k; t++) {
if (Events[t - 1] == 3 || Events[t - 1] == 4) // Substitution or correct
{
tmpLabelMap[ind][0] = ind1;
tmpLabelMap[ind][1] = ind2;
ind1++;
ind2++;
ind++;
} else if (Events[t - 1] == 1) // An item in seq1 is deleted in seq2
{
ind1++;
} else if (Events[t - 1] == 2) // An item is inserted in seq2
{
ind2++;
}
}
if (ind > 0) {
labelMap = new int[ind][2];
for (i = 0; i < labelMap.length; i++) {
labelMap[i][0] = tmpLabelMap[i][0];
labelMap[i][1] = tmpLabelMap[i][1];
}
}
return labelMap;
}
// This version assumes that there can only be insertions and deletions but no substitutions
// (i.e. text based alignment with possible differences in pauses only)
public static int[][] alignLabels(Label[] seq1, Label[] seq2) {
return alignLabels(seq1, seq2, 0.05, 0.05, 0.05);
}
}