TestSentence.java example

Explorer
CoreNLP-master
// MaxentTagger -- StanfordMaxEnt, A Maximum Entropy Toolkit
// Copyright (c) 2002-2016 Leland Stanford Junior University

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

// For more information, bug reports, fixes, contact:
// Christopher Manning
// Dept of Computer Science, Gates 2A
// Stanford CA 94305-9020
// USA
// Support/Questions: stanford-nlp on SO or java-nlp-user@lists.stanford.edu
// Licensing: java-nlp-support@lists.stanford.edu
// http://nlp.stanford.edu/software/tagger.html

package edu.stanford.nlp.tagger.maxent;

import edu.stanford.nlp.io.EncodingPrintWriter;
import edu.stanford.nlp.io.PrintFile;
import edu.stanford.nlp.ling.*;
import edu.stanford.nlp.ling.SentenceUtils;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.math.SloppyMath;
import edu.stanford.nlp.sequences.BestSequenceFinder;
import edu.stanford.nlp.sequences.ExactBestSequenceFinder;
import edu.stanford.nlp.sequences.SequenceModel;
import edu.stanford.nlp.tagger.common.Tagger;
import edu.stanford.nlp.util.*;
import edu.stanford.nlp.util.logging.Redwood;

import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.util.*;
import java.text.NumberFormat;
import java.text.DecimalFormat;


/**
 * @author Kristina Toutanova
 * @author Michel Galley
 * @version 1.0
 */
public class TestSentence implements SequenceModel  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(TestSentence.class);

  protected final boolean VERBOSE;
  protected static final String naTag = "NA";
  private static final String[] naTagArr = { naTag };
  protected static final boolean DBG = false;
  protected static final int kBestSize = 1;

  protected final String tagSeparator;
  protected final String encoding;
  protected final PairsHolder pairs = new PairsHolder();
  protected List<String> sent;
  private List<String> originalTags;
  // origWords is only set when run with a list of HasWords; when run
  // with a list of strings, this will be null
  protected List<HasWord> origWords;
  protected int size; // TODO this always has the value of sent.size(). Remove it? [cdm 2008]
  // protected double[][][] probabilities;
  private String[] correctTags;
  protected String[] finalTags;
  int numRight;
  int numWrong;
  int numUnknown;
  int numWrongUnknown;
  private int endSizePairs; // = 0;

  private volatile History history;
  private volatile Map<String,double[]> localScores = Generics.newHashMap();
  private volatile double[][] localContextScores;

  protected final MaxentTagger maxentTagger;

  public TestSentence(MaxentTagger maxentTagger) {
    assert(maxentTagger != null);
    assert(maxentTagger.getLambdaSolve() != null);
    this.maxentTagger = maxentTagger;
    if (maxentTagger.config != null) {
      tagSeparator = maxentTagger.config.getTagSeparator();
      encoding = maxentTagger.config.getEncoding();
      VERBOSE = maxentTagger.config.getVerbose();
    } else {
      tagSeparator = TaggerConfig.getDefaultTagSeparator();
      encoding = "utf-8";
      VERBOSE = false;
    }
    history = new History(pairs, maxentTagger.extractors);
  }

  public void setCorrectTags(List<? extends HasTag> sentence) {
    int len = sentence.size();
    correctTags = new String[len];
    for (int i = 0; i < len; i++) {
      correctTags[i] = sentence.get(i).tag();
    }
  }

  /**
   * Tags the sentence s by running maxent model.  Returns a sentence (List) of
   * TaggedWord objects.
   *
   * @param s Input sentence (List).  This isn't changed.
   * @return Tagged sentence
   */
  public ArrayList<TaggedWord> tagSentence(List<? extends HasWord> s,
                                           boolean reuseTags) {
    this.origWords = new ArrayList<>(s);
    int sz = s.size();
    this.sent = new ArrayList<>(sz + 1);
    for (HasWord value1 : s) {
      if (maxentTagger.wordFunction != null) {
        sent.add(maxentTagger.wordFunction.apply(value1.word()));
      } else {
        sent.add(value1.word());
      }
    }
    sent.add(Tagger.EOS_WORD);
    if (reuseTags) {
      this.originalTags = new ArrayList<>(sz + 1);
      for (HasWord value : s) {
        if (value instanceof HasTag) {
          originalTags.add(((HasTag) value).tag());
        } else {
          originalTags.add(null);
        }
      }
      originalTags.add(Tagger.EOS_TAG);
    }
    size = sz + 1;
    if (VERBOSE) {
      log.info("Sentence is " + SentenceUtils.listToString(sent, false, tagSeparator));
    }
    init();
    ArrayList<TaggedWord> result = testTagInference();
    if (maxentTagger.wordFunction != null) {
      for (int j = 0; j < sz; ++j) {
        result.get(j).setWord(s.get(j).word());
      }
    }
    return result;
  }


  protected void revert(int prevSize) {
    endSizePairs = prevSize;
  }

  protected void init() {
    //the eos are assumed already there
    localContextScores = new double[size][];
    for (int i = 0; i < size - 1; i++) {
      if (maxentTagger.dict.isUnknown(sent.get(i))) {
        numUnknown++;
      }
    }
  }

  /**
   * Returns a string representation of the sentence.
   * @return tagged sentence
   */
  String getTaggedNice() {
    StringBuilder sb = new StringBuilder();
    // size - 1 means to exclude the EOS (end of string) symbol
    for (int i = 0; i < size - 1; i++) {
      sb.append(toNice(sent.get(i))).append(tagSeparator).append(toNice(finalTags[i]));
      sb.append(' ');
    }
    return sb.toString();
  }


  ArrayList<TaggedWord> getTaggedSentence() {
    final boolean hasOffset;
    hasOffset = origWords != null && origWords.size() > 0 && (origWords.get(0) instanceof HasOffset);
    ArrayList<TaggedWord> taggedSentence = new ArrayList<>();
    for (int j = 0; j < size - 1; j++) {
      String tag = finalTags[j];
      TaggedWord w = new TaggedWord(sent.get(j), tag);
      if (hasOffset) {
        HasOffset offset = (HasOffset) origWords.get(j);
        w.setBeginPosition(offset.beginPosition());
        w.setEndPosition(offset.endPosition());
      }
      taggedSentence.add(w);
    }
    return taggedSentence;
  }

  static String toNice(String s) {
    if (s == null) {
      return naTag;
    } else {
      return s;
    }
  }

  /** calculateProbs puts log probs of taggings in the probabilities array.
   *
   *  @param probabilities Array with indices sent size, k best size, numTags
   */
  protected void calculateProbs(double[][][] probabilities) {
    ArrayUtils.fill(probabilities, Double.NEGATIVE_INFINITY);
    for (int hyp = 0; hyp < kBestSize; hyp++) {
      // put the whole thing in pairs, give its beginning and end
      pairs.setSize(size);
      for (int i = 0; i < size; i++) {
        pairs.setWord(i,sent.get(i));
        pairs.setTag(i,finalTags[i]);
        //pairs.add(new WordTag(sent.get(i),finalTags[i]));
        // TODO: if kBestSize > 1, use KBestSequenceFinder and save
        // k-best hypotheses into finalTags:
        //pairs.setTag(i,finalTags[i]);
      }
      int start = endSizePairs;
      int end = endSizePairs + size - 1;
      endSizePairs = endSizePairs + size;
      // iterate over the sentence
      for (int current = 0; current < size; current++) {
        History h = new History(start, end, current + start, pairs, maxentTagger.extractors);
        String[] tags = stringTagsAt(h.current - h.start + leftWindow());
        double[] probs = getHistories(tags, h);
        ArrayMath.logNormalize(probs);

        // log.info("word: " + pairs.getWord(current));
        // log.info("tags: " + Arrays.asList(tags));
        // log.info("probs: " + ArrayMath.toString(probs));

        for (int j = 0; j < tags.length; j++) {
          // score the j-th tag
          String tag = tags[j];
          boolean approximate = maxentTagger.hasApproximateScoring();
          int tagindex = approximate ? maxentTagger.tags.getIndex(tag) : j;
          // log.info("Mapped from j="+ j + " " + tag + " to " + tagindex);
          probabilities[current][hyp][tagindex] = probs[j];
        }
      } // for current
    } // for hyp
    // clean up the stuff in PairsHolder (added by cdm in Aug 2008)
    revert(0);
  } // end calculateProbs()


  /** Write the tagging and note any errors (if pf != null) and accumulate
   *  global statistics.
   *
   *  @param finalTags Chosen tags for sentence
   *  @param pf File to write tagged output to (can be null, then no output;
   *               at present it is non-null iff the debug property is set)
   */
  protected void writeTagsAndErrors(String[] finalTags, PrintFile pf, boolean verboseResults) {
    StringWriter sw = new StringWriter(200);
    for (int i = 0; i < correctTags.length; i++) {
      sw.write(toNice(sent.get(i)));
      sw.write(tagSeparator);
      sw.write(finalTags[i]);
      sw.write(' ');
      if (pf != null) {
        pf.print(toNice(sent.get(i)));
        pf.print(tagSeparator);
        pf.print(finalTags[i]);
      }
      if ((correctTags[i]).equals(finalTags[i])) {
        numRight++;
      } else {
        numWrong++;
        if (pf != null) pf.print('|' + correctTags[i]);
        if (verboseResults) {
          EncodingPrintWriter.err.println((maxentTagger.dict.isUnknown(sent.get(i)) ? "Unk" : "") + "Word: " + sent.get(i) + "; correct: " + correctTags[i] + "; guessed: " + finalTags[i], encoding);
        }

        if (maxentTagger.dict.isUnknown(sent.get(i))) {
          numWrongUnknown++;
          if (pf != null) pf.print("*");
        }// if
      }// else
      if (pf != null) pf.print(' ');
    }// for
    if (pf != null) pf.println();

    if (verboseResults) {
      PrintWriter pw;
      try {
        pw = new PrintWriter(new OutputStreamWriter(System.out, encoding), true);
      } catch (UnsupportedEncodingException uee) {
        pw = new PrintWriter(new OutputStreamWriter(System.out), true);
      }
      pw.println(sw);
    }
  }

  /**
   * Update a confusion matrix with the errors from this sentence.
   *
   * @param finalTags Chosen tags for sentence
   * @param confusionMatrix Confusion matrix to write to
   */
  protected void updateConfusionMatrix(String[] finalTags,
                                       ConfusionMatrix<String> confusionMatrix) {
    for (int i = 0; i < correctTags.length; i++)
      confusionMatrix.add(finalTags[i], correctTags[i]);
  }


  /**
   * Test using (exact Viterbi) TagInference.
   *
   * @return The tagged sentence
   */
  private ArrayList<TaggedWord> testTagInference() {
    runTagInference();
    return getTaggedSentence();
  }

  private void runTagInference() {
    this.initializeScorer();
    if (Thread.interrupted()) {  // Allow interrupting
      throw new RuntimeInterruptedException();
    }

    BestSequenceFinder ti = new ExactBestSequenceFinder();
      //new BeamBestSequenceFinder(50);
      //new KBestSequenceFinder()
    int[] bestTags = ti.bestSequence(this);
    finalTags = new String[bestTags.length];
    for (int j = 0; j < size; j++) {
      finalTags[j] = maxentTagger.tags.getTag(bestTags[j + leftWindow()]);
    }

    if (Thread.interrupted()) {  // Allow interrupting
      throw new RuntimeInterruptedException();
    }
    cleanUpScorer();
  }


  // This is used for Dan's tag inference methods.
  // current is the actual word number + leftW
  private void setHistory(int current, History h, int[] tags) {
    //writes over the tags in the last thing in pairs

    int left = leftWindow();
    int right = rightWindow();

    for (int j = current - left; j <= current + right; j++) {
      if (j < left) {
        continue;
      } //but shouldn't happen
      if (j >= size + left) {
        break;
      } //but shouldn't happen
      h.setTag(j - left, maxentTagger.tags.getTag(tags[j]));
    }
  }

  // do initializations for the TagScorer interface
  protected void initializeScorer() {
    pairs.setSize(size);
    for (int i = 0; i < size; i++)
      pairs.setWord(i,sent.get(i));
    endSizePairs += size;
  }


  /**
   * clean-up after the scorer
   */
  protected void cleanUpScorer() {
    revert(0);
  }

  // This scores the current assignment in PairsHolder at
  // current position h.current (returns normalized scores)
  private double[] getScores(History h) {
    if (maxentTagger.hasApproximateScoring()) {
      return getApproximateScores(h);
    }
    return getExactScores(h);
  }

  private double[] getExactScores(History h) {
    String[] tags = stringTagsAt(h.current - h.start + leftWindow());
    double[] histories = getHistories(tags, h); // log score for each tag
    ArrayMath.logNormalize(histories);
    double[] scores = new double[tags.length];
    for (int j = 0; j < tags.length; j++) {
      // score the j-th tag
      String tag = tags[j];
      int tagindex = maxentTagger.tags.getIndex(tag);
      scores[j] = histories[tagindex];
    }
    return scores;
  }

  // In this method, each tag that is incompatible with the current word
  // (e.g., apple_CC) gets a default (constant) score instead of its exact score.
  // The scores of all other tags are computed exactly.
  private double[] getApproximateScores(History h) {
    String[] tags = stringTagsAt(h.current - h.start + leftWindow());
    double[] scores = getHistories(tags, h); // log score for each active tag, unnormalized

    // Number of tags that get assigned a default score:
    int nDefault = maxentTagger.ySize - tags.length;
    double logScore = ArrayMath.logSum(scores);
    double logScoreInactiveTags = maxentTagger.getInactiveTagDefaultScore(nDefault);
    double logTotal = SloppyMath.logAdd(logScore, logScoreInactiveTags);
    ArrayMath.addInPlace(scores, -logTotal);

    return scores;
  }

  // This precomputes scores of local features (localScores).
  protected double[] getHistories(String[] tags, History h) {
    boolean rare = maxentTagger.isRare(ExtractorFrames.cWord.extract(h));
    Extractors ex = maxentTagger.extractors, exR = maxentTagger.extractorsRare;
    String w = pairs.getWord(h.current);
    double[] lS, lcS;
    lS = localScores.get(w);
    if (lS == null) {
      lS = getHistories(tags, h, ex.local, rare ? exR.local : null);
      localScores.put(w,lS);
    } else if (lS.length != tags.length) {
      // This case can occur when a word was given a specific forced
      // tag, and then later it shows up without the forced tag.
      // TODO: if a word is given a forced tag, we should always get
      // its features rather than use the cache, just in case the tag
      // given is not the same tag as before
      lS = getHistories(tags, h, ex.local, rare ? exR.local : null);
      if (tags.length > 1) {
        localScores.put(w,lS);
      }
    }
    if((lcS = localContextScores[h.current]) == null) {
      lcS = getHistories(tags, h, ex.localContext, rare ? exR.localContext : null);
      localContextScores[h.current] = lcS;
      ArrayMath.pairwiseAddInPlace(lcS,lS);
    }
    double[] totalS = getHistories(tags, h, ex.dynamic, rare ? exR.dynamic : null);
    ArrayMath.pairwiseAddInPlace(totalS,lcS);
    return totalS;
  }

  private double[] getHistories(String[] tags, History h, List<Pair<Integer,Extractor>> extractors, List<Pair<Integer,Extractor>> extractorsRare) {
    if(maxentTagger.hasApproximateScoring())
      return getApproximateHistories(tags, h, extractors, extractorsRare);
    return getExactHistories(h, extractors, extractorsRare);
  }

  private double[] getExactHistories(History h, List<Pair<Integer,Extractor>> extractors, List<Pair<Integer,Extractor>> extractorsRare) {
    double[] scores = new double[maxentTagger.ySize];
    int szCommon = maxentTagger.extractors.size();

    for (Pair<Integer,Extractor> e : extractors) {
      int kf = e.first();
      Extractor ex = e.second();
      String val = ex.extract(h);
      int[] fAssociations = maxentTagger.fAssociations.get(kf).get(val);
      if (fAssociations != null) {
        for (int i = 0; i < maxentTagger.ySize; i++) {
          int fNum = fAssociations[i];
          if (fNum > -1) {
            scores[i] += maxentTagger.getLambdaSolve().lambda[fNum];
          }
        }
      }
    }
    if (extractorsRare != null) {
      for (Pair<Integer,Extractor> e : extractorsRare) {
        int kf = e.first();
        Extractor ex = e.second();
        String val = ex.extract(h);
        int[] fAssociations = maxentTagger.fAssociations.get(kf+szCommon).get(val);
        if (fAssociations != null) {
          for (int i = 0; i < maxentTagger.ySize; i++) {
            int fNum = fAssociations[i];
            if (fNum > -1) {
              scores[i] += maxentTagger.getLambdaSolve().lambda[fNum];
            }
          }
        }
      }
    }
    return scores;
  }

  // todo [cdm 2016]: Could this be sped up a bit by caching lambda array, extracting method for shared code?
  // todo [cdm 2016]: Also it's allocating java.util.ArrayList$Itr for for loop - why can't it just random access array?
  /** Returns an unnormalized score (in log space) for each tag. */
  private double[] getApproximateHistories(String[] tags, History h, List<Pair<Integer,Extractor>> extractors, List<Pair<Integer,Extractor>> extractorsRare) {

    double[] scores = new double[tags.length];
    int szCommon = maxentTagger.extractors.size();

    for (Pair<Integer,Extractor> e : extractors) {
      int kf = e.first();
      Extractor ex = e.second();
      String val = ex.extract(h);
      int[] fAssociations = maxentTagger.fAssociations.get(kf).get(val);
      if (fAssociations != null) {
        for (int j = 0; j < tags.length; j++) {
          String tag = tags[j];
          int tagIndex = maxentTagger.tags.getIndex(tag);
          int fNum = fAssociations[tagIndex];
          if (fNum > -1) {
            scores[j] += maxentTagger.getLambdaSolve().lambda[fNum];
          }
        }
      }
    }
    if (extractorsRare != null) {
      for (Pair<Integer,Extractor> e : extractorsRare) {
        int kf = e.first();
        Extractor ex = e.second();
        String val = ex.extract(h);
        int[] fAssociations = maxentTagger.fAssociations.get(szCommon+kf).get(val);
        if (fAssociations != null) {
          for (int j = 0; j < tags.length; j++) {
            String tag = tags[j];
            int tagIndex = maxentTagger.tags.getIndex(tag);
            int fNum = fAssociations[tagIndex];
            if (fNum > -1) {
              scores[j] += maxentTagger.getLambdaSolve().lambda[fNum];
            }
          }
        }
      }
    }
    return scores;
  }


  /**
   * This method should be called after the sentence has been tagged.
   * For every unknown word, this method prints the 3 most probable tags
   * to the file pfu.
   *
   * @param numSent The sentence number
   * @param pfu The file to print the probable tags to
   */
  void printUnknown(int numSent, PrintFile pfu) {
    NumberFormat nf = new DecimalFormat("0.0000");
    int numTags = maxentTagger.numTags();
    double[][][] probabilities = new double[size][kBestSize][numTags];
    calculateProbs(probabilities);
    for (int current = 0; current < size; current++) {
      if (maxentTagger.dict.isUnknown(sent.get(current))) {
        pfu.print(sent.get(current));
        pfu.print(':');
        pfu.print(numSent);
        double[] probs = new double[3];
        String[] tag3 = new String[3];
        getTop3(probabilities, current, probs, tag3);
        for (int i = 0; i < 3; i++) {
          if (probs[i] > Double.NEGATIVE_INFINITY) {
            pfu.print('\t');
            pfu.print(tag3[i]);
            pfu.print(' ');
            pfu.print(nf.format(Math.exp(probs[i])));
          }
        }
        int rank;
        String correctTag = toNice(this.correctTags[current]);
        for (rank = 0; rank < 3; rank++) {
          if (correctTag.equals(tag3[rank])) {
            break;
          } //if
        }
        pfu.print('\t');
        switch (rank) {
          case 0:
            pfu.print("Correct");
            break;
          case 1:
            pfu.print("2nd");
            break;
          case 2:
            pfu.print("3rd");
            break;
          default:
            pfu.print("Not top 3");
        }
        pfu.println();
      }// if
    }// for
  }

  // This method should be called after a sentence has been tagged.
  // For every word token, this method prints the 3 most probable tags
  // to the file pfu except for
  void printTop(PrintFile pfu) {
    NumberFormat nf = new DecimalFormat("0.0000");
    int numTags = maxentTagger.numTags();
    double[][][] probabilities = new double[size][kBestSize][numTags];
    calculateProbs(probabilities);

    for (int current = 0; current < correctTags.length; current++) {
      pfu.print(sent.get(current));
      double[] probs = new double[3];
      String[] tag3 = new String[3];
      getTop3(probabilities, current, probs, tag3);
      for (int i = 0; i < 3; i++) {
        if (probs[i] > Double.NEGATIVE_INFINITY) {
          pfu.print('\t');
          pfu.print(tag3[i]);
          pfu.print(' ');
          pfu.print(nf.format(Math.exp(probs[i])));
        }
      }
      int rank;
      String correctTag = toNice(this.correctTags[current]);
      for (rank = 0; rank < 3; rank++) {
        if (correctTag.equals(tag3[rank])) {
          break;
        } //if
      }
      pfu.print('\t');
      switch (rank) {
      case 0:
        pfu.print("Correct");
        break;
      case 1:
        pfu.print("2nd");
        break;
      case 2:
        pfu.print("3rd");
        break;
      default:
        pfu.print("Not top 3");
      }
      pfu.println();
    } // for
  }

  /** probs and tags should be passed in as arrays of size 3!
   *  If probs[i] == Double.NEGATIVE_INFINITY, then the entry should be ignored.
   */
  private void getTop3(double[][][] probabilities, int current, double[] probs, String[] tags) {
    int[] topIds = new int[3];
    double[] probTags = probabilities[current][0];
    Arrays.fill(probs, Double.NEGATIVE_INFINITY);
    for (int i = 0; i < probTags.length; i++) {
      if (probTags[i] > probs[0]) {
        probs[2] = probs[1];
        probs[1] = probs[0];
        probs[0] = probTags[i];
        topIds[2] = topIds[1];
        topIds[1] = topIds[0];
        topIds[0] = i;
      } else if (probTags[i] > probs[1]) {
        probs[2] = probs[1];
        probs[1] = probTags[i];
        topIds[2] = topIds[1];
        topIds[1] = i;
      } else if (probTags[i] > probs[2]) {
        probs[2] = probTags[i];
        topIds[2] = i;
      }
    }
    for (int j = 0; j < 3; j++) {
      tags[j] = toNice(maxentTagger.tags.getTag(topIds[j]));
    }
  }

  /*
   * Implementation of the TagScorer interface follows
   */

  @Override
  public int length() {
    return sent.size();
  }

  @Override
  public int leftWindow() {
    return maxentTagger.leftContext; //hard-code for now
  }

  @Override
  public int rightWindow() {
    return maxentTagger.rightContext; //hard code for now
  }


  @Override
  public int[] getPossibleValues(int pos) {
    String[] arr1 = stringTagsAt(pos);
    int[] arr = new int[arr1.length];
    for (int i = 0; i < arr.length; i++) {
      arr[i] = maxentTagger.tags.getIndex(arr1[i]);
    }

    return arr;
  }

  @Override
  public double scoreOf(int[] tags, int pos) {
    double[] scores = scoresOf(tags, pos);
    double score = Double.NEGATIVE_INFINITY;
    int[] pv = getPossibleValues(pos);
    for (int i = 0; i < scores.length; i++) {
      if (pv[i] == tags[pos]) {
        score = scores[i];
      }
    }
    return score;
  }

  @Override
  public double scoreOf(int[] sequence) {
    throw new UnsupportedOperationException();
  }

  @Override
  public double[] scoresOf(int[] tags, int pos) {
    if (DBG) {
      log.info("scoresOf(): length of tags is " + tags.length + "; position is " + pos + "; endSizePairs = " + endSizePairs + "; size is " + size + "; leftWindow is " + leftWindow());
      log.info("  History h = new History(" + (endSizePairs - size) + ", " + (endSizePairs - 1) + ", " + (endSizePairs - size + pos - leftWindow()) + ")");
    }
    history.init(endSizePairs - size, endSizePairs - 1, endSizePairs - size + pos - leftWindow());
    setHistory(pos, history, tags);
    return getScores(history);
  }

  // todo [cdm 2013]: Tagging could be sped up quite a bit here if we cached int arrays of tags by index, not Strings
  protected String[] stringTagsAt(int pos) {
    if ((pos < leftWindow()) || (pos >= size + leftWindow())) {
      return naTagArr;
    }

    String[] arr1;
    if (originalTags != null && originalTags.get(pos - leftWindow()) != null) {
      arr1 = new String[1];
      arr1[0] = originalTags.get(pos - leftWindow());
      return arr1;
    }

    String word = sent.get(pos - leftWindow());
    if (maxentTagger.dict.isUnknown(word)) {
      Set<String> open = maxentTagger.tags.getOpenTags();  // todo: really want array of String or int here
      arr1 = open.toArray(new String[open.size()]);
    } else {
      arr1 = maxentTagger.dict.getTags(word);
    }
    arr1 = maxentTagger.tags.deterministicallyExpandTags(arr1);
    return arr1;
  }

}