/* Copyright (c) 2009-2011 Speech Group at Informatik 5, Univ. Erlangen-Nuremberg, GERMANY Korbinian Riedhammer Tobias Bocklet This file is part of the Java Speech Toolkit (JSTK). The JSTK is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. The JSTK is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with the JSTK. If not, see <http://www.gnu.org/licenses/>. */ package de.fau.cs.jstk.stat; import java.util.Iterator; import java.util.List; import org.apache.log4j.Logger; import de.fau.cs.jstk.exceptions.TrainingException; import de.fau.cs.jstk.util.Arithmetics; /** * Implementation of sequential training algorithms for (mixture) densities. * These are: EM, MAP, ML. * * @author bocklet,sikoried * */ public abstract class Trainer { public static Logger logger = Logger.getLogger(Trainer.class); /** * Standard (single-core) maximum likelihood estimation for a single * Gaussian density * * @param data * @param diagonalCovariances * @return * @throws TrainingException */ public static Density ml1(List<double []> data, boolean diagonalCovariances) { int fd = data.get(0).length; double scale = 1. / data.size(); Density d = (diagonalCovariances ? new DensityDiagonal(fd) : new DensityFull(fd)); // accumulate... for (double [] s : data) { for (int i = 0; i < fd; ++i) { d.mue[i] += s[i] * scale; if (diagonalCovariances) d.cov[i] += s[i] * s[i] * scale; else { int l = 0; for (int j = 0; j < fd; ++j) for (int k = 0; k <= j; ++k) d.cov[l++] += s[j] * s[k] * scale; } } } // normalize... if (diagonalCovariances) { for (int i = 0; i < fd; ++i) d.cov[i] -= (d.mue[i] * d.mue[i]); } else { int k = 0; for (int i = 0; i < fd; ++i) for (int j = 0; j <= i; ++j) d.cov[k++] -= (d.mue[i] * d.mue[j]); } // update internal stats d.update(); return d; } /** * Standard (single-core) maximum likelihood estimation for a single * Gaussian density * * @param data * @param diagonalCovariances * @return * @throws TrainingException */ public static Density ml(List<Sample> data, boolean diagonalCovariances) { int fd = data.get(0).x.length; double scale = 1. / data.size(); Density d = (diagonalCovariances ? new DensityDiagonal(fd) : new DensityFull(fd)); // accumulate... for (Sample s : data) { for (int i = 0; i < fd; ++i) { d.mue[i] += s.x[i] * scale; if (diagonalCovariances) d.cov[i] += s.x[i] * s.x[i] * scale; else { int l = 0; for (int j = 0; j < fd; ++j) for (int k = 0; k <= j; ++k) d.cov[l++] += s.x[j] * s.x[k] * scale; } } } // normalize... if (diagonalCovariances) { for (int i = 0; i < fd; ++i) d.cov[i] -= (d.mue[i] * d.mue[i]); } else { int k = 0; for (int i = 0; i < fd; ++i) for (int j = 0; j <= i; ++j) d.cov[k++] -= (d.mue[i] * d.mue[j]); } // update internal stats d.update(); return d; } /** * Perform a number of EM iterations (single-core, cached posteriors) using * the initial density and the given data. * * @param initial * @param data * @param iterations * @return */ public static Mixture em(Mixture initial, List<Sample> data, int iterations) { Mixture estimate = initial; while (iterations-- >= 0) estimate = em(estimate, data); return estimate; } /** * Given an initial mixture density, perform a single EM iteration w/ out * the use of parallelization. <br /> * Note that this version makes use of the following reformulation: * K = (sum_i gamma_i (x - mue)(x - mue)^T) / (sum_i gamma_i) = <br /> * = ((sum_i gamma_i xx^T) / (sum_i gamma_i)) - mue mue^T * * @param initial * @param data * @return * @throws TrainingException */ public static Mixture em(Mixture initial, List<Sample> data) { boolean dc = initial.diagonal(); int nd = initial.nd; int fd = initial.fd; // will hold the posteriors double [] p = new double [nd]; // accumulated posteriors double asum = 0.; double [] sp = new double[nd]; // first, accumulate the statistics logger.info("Trainer.em(): computing statistics..."); Mixture em_estim2 = new Mixture(fd, nd, dc); Density[] d2 = em_estim2.components; Iterator<Sample> xiter = data.iterator(); while (xiter.hasNext()) { double[] x = xiter.next().x; // evaluate and get posteriors initial.evaluate(x); initial.posteriors(p); // for all densities for (int j = 0; j < nd; ++j) { // priors sp[j] += p[j]; asum += p[j]; double [] mue = d2[j].mue; double [] cov = d2[j].cov; double px; // diagonal ? cov and mue in one run! if (dc) { for (int k = 0; k < fd; ++k) { px = p[j] * x[k]; mue[k] += px; cov[k] += px * x[k]; } } else { int m = 0; for (int k = 0; k < fd; ++k) { px = p[j] * x[k]; mue[k] += px; for (int l = 0; l <= k; ++l) cov[m++] += px * x[l]; } } } } // normalize priors, means and covariance logger.info("Trainer.em(): normalizing..."); for (int i = 0; i < nd; ++i) { d2[i].apr = sp[i] / asum; double [] mue = d2[i].mue; double [] cov = d2[i].cov; Arithmetics.sdiv2(mue, sp[i]); Arithmetics.sdiv2(cov, sp[i]); // conclude covariance computation if (dc) { for (int j = 0; j < fd; ++j) cov[j] -= mue[j] * mue[j]; } else { int l = 0; for (int j = 0; j < fd; ++j) for (int k = 0; k <= j; ++k) cov[l++] -= mue[j] * mue[k]; } } // update the internals for (Density dens : d2) dens.update(); return em_estim2; } /** * Perform a number of MAP iterations, based on the initial estimate * * @param initial * @param data * @param iterations * @param update String indicating which parameters to update: p for prior, m for mean, c for covariance; if null, "pmc" (i.e. all parameters) is assumed * @return */ public static Mixture map(Mixture initial, List<Sample> data, double r, int iterations, String update) { Mixture estimate = initial; while (iterations-- > 0) estimate = map(estimate, data, r, update); return estimate; } /** * Perform a single MAP iteration on the given initial estimate * * @param initial * @param r Relevance factor * @param update String indicating which parameters to update: p for prior, m for mean, c for covariance; if null, "pmc" (i.e. all parameters) is assumed * @return */ public static Mixture map(Mixture initial, List<Sample> data, double r, String update) { boolean updatePriors = true; boolean updateMeans = true; boolean updateCovariance = true; if (update != null) { update = update.toLowerCase(); if (update.indexOf("p") < 0) updatePriors = false; if (update.indexOf("m") < 0) updateMeans = false; if (update.indexOf("c") < 0) updateCovariance = false; } if (!(updatePriors || updateMeans || updateCovariance)) { logger.info("Trainer.Map(): no parameter update selected, returning initial estimate"); return initial; } int n = data.size(); int nd = initial.nd; int fd = initial.fd; boolean diagonal = initial.diagonal(); // accumulate posteriors double [] sp = new double[nd]; double [] p = new double [nd]; // new container Mixture adapted = new Mixture(fd, nd, diagonal); Density[] d1 = initial.components; Density[] d2 = adapted.components; // accumulate the statistics Iterator<Sample> xiter = data.iterator(); for (int i = 0; i < n; ++i) { double[] x = xiter.next().x; initial.evaluate(x); initial.posteriors(p); for (int j = 0; j < nd; ++j) { // priors sp[j] += p[j]; double [] mue = d2[j].mue; double [] cov = d2[j].cov; // means for (int k = 0; k < fd; ++k) mue[k] += p[j] * x[k]; // covariance if (diagonal) { for (int k = 0; k < fd; ++k) cov[k] += p[j] * x[k] * x[k]; } else { int m = 0; for (int k = 0; k < fd; ++k) for (int l = 0; l <= k; ++l) cov[m++] += p[j] * x[k] * x[l]; } } } // normalize the statistics for (int i = 0; i < nd; ++i) { Arithmetics.sdiv2(d2[i].mue, sp[i]); Arithmetics.sdiv2(d2[i].cov, sp[i]); } // now combine the two estimates using the relevance factor double priorSum = 0.; for (int i = 0; i < nd; ++i){ double alpha = sp[i] / (r + sp[i]); // update prior if (updatePriors) { d2[i].apr = ((alpha * sp[i]) / (double) n) + ((1. - alpha) * d1[i].apr); priorSum += d2[i].apr; } else d2[i].apr = d1[i].apr; // update mean if (updateMeans) { for (int j = 0; j < fd; ++j) d2[i].mue[j] = (alpha * d2[i].mue[j]) + ((1. - alpha) * d1[i].mue[j]); } else System.arraycopy(d1[i].mue, 0, d2[i].mue, 0, fd); if (updateCovariance) { // update covariance matrix for (int j = 0; j < fd; ++j){ if(diagonal) { d2[i].cov[j] = (alpha * d2[i].cov[j]) + ((1. - alpha) * (d1[i].cov[j] + (d1[i].mue[j] * d1[i].mue[j])) - (d2[i].mue[j] * d2[i].mue[j])); } else { int m = 0; for (int k = 0; k <= j; ++k){ d2[i].cov[m] = (alpha * d2[i].cov[m]) + ((1. - alpha) * (d1[i].cov[m] + (d1[i].mue[j] * d1[i].mue[k])) - (d2[i].mue[j] * d2[i].mue[k])); m++; } } } } else System.arraycopy(d1[i].cov, 0, d2[i].cov, 0, d1[i].cov.length); } // normalize priors if (updatePriors) { for (int i = 0; i < nd; ++i) d2[i].apr /= priorSum; } // update the internals for (Density d : adapted.components) d.update(); return adapted; } }