package edu.stanford.nlp.sequences; import edu.stanford.nlp.math.ArrayMath; import java.util.Arrays; /** * This class will sample an output from a sequence model. It * assumes that the scores are (unnormalized) log-probabilities. It works by sampling * each variable in order, conditioned on the previous variables. * * @author Jenny Finkel */ public class SequenceSampler implements BestSequenceFinder { /** * A class for testing. */ private static class TestSequenceModel implements SequenceModel { private int[] correctTags = {0, 0, 1, 2, 3, 4, 5, 6, 7, 6, 5, 4, 3, 2, 1, 0, 0}; private int[] allTags = {1, 2, 3, 4, 5, 6, 7, 8, 9}; private int[] midTags = {0, 1, 2, 3}; private int[] nullTags = {0}; public int length() { return correctTags.length - leftWindow() - rightWindow(); } public int leftWindow() { return 2; } public int rightWindow() { return 0; } public int[] getPossibleValues(int pos) { if (pos < leftWindow() || pos >= leftWindow() + length()) { return nullTags; } if (correctTags[pos] < 4) { return midTags; } return allTags; } public double scoreOf(int[] tags, int pos) { return 1.0; } public double scoreOf(int[] sequence) { throw new UnsupportedOperationException(); } public double[] scoresOf(int[] tags, int pos) { int[] tagsAtPos = getPossibleValues(pos); double[] scores = new double[tagsAtPos.length]; Arrays.fill(scores, 1.0); return scores; } } // end class TestSequenceModel private static String arrayToString(int[] x) { StringBuilder sb = new StringBuilder("("); for (int j = 0; j < x.length; j++) { sb.append(x[j]); if (j != x.length - 1) { sb.append(", "); } } sb.append(")"); return sb.toString(); } public static void main(String[] args) { SequenceSampler ti = new SequenceSampler(); SequenceModel ts = new TestSequenceModel(); int[] bestTags = ti.bestSequence(ts); System.out.println("The best sequence is ... " + arrayToString(bestTags)); } /** * Runs the Viterbi algorithm on the sequence model given by the TagScorer * in order to find the best sequence. * @param ts * @return an array containing the int tags of the best sequence */ public int[] bestSequence(SequenceModel ts) { int[] sample = new int[ts.length()+ts.leftWindow()]; for (int pos = ts.leftWindow(); pos < sample.length; pos++) { double[] scores = ts.scoresOf(sample, pos); double total = 0.0; for (int i = 0; i < scores.length; i++) { scores[i] = Math.exp(scores[i]); } ArrayMath.normalize(scores); int l = ArrayMath.sampleFromDistribution(scores); sample[pos] = ts.getPossibleValues(pos)[l]; } return sample; } }