///**
// *
// */
//package edu.berkeley.nlp.PCFGLA;
//
//import java.util.Arrays;
//import java.util.List;
//
//import edu.berkeley.nlp.PCFGLA.SimpleLexicon.IntegerIndexer;
//import edu.berkeley.nlp.PCFGLA.smoothing.Smoother;
//import edu.berkeley.nlp.syntax.StateSet;
//import edu.berkeley.nlp.syntax.Tree;
//import fig.basic.Indexer;
//
///**
// * @author petrov
// * each word's tagging probability is the sum of the (word,tag) score and the (signature,tag) score
// *
// */
//public class HierarchicalCombinedLexicon extends HierarchicalLexicon{
// private static final long serialVersionUID = 1L;
// protected int knownWordCount;
// /**
// * @param numSubStates
// * @param threshold
// */
// public HierarchicalCombinedLexicon(short[] numSubStates, int knownWordCount) {
// super(numSubStates, 0);
// this.knownWordCount = knownWordCount;
// }
//
// public HierarchicalCombinedLexicon(short[] numSubStates, int smoothingCutoff, double[] smoothParam,
// Smoother smoother, StateSetTreeList trainTrees, int knownWordCount) {
// this(numSubStates, knownWordCount);
// init(trainTrees);
// }
//
//
// /**
// * @param previousLexicon
// */
// public HierarchicalCombinedLexicon(SimpleLexicon previousLexicon, int knownWordCount) {
// super(previousLexicon);
// this.knownWordCount = knownWordCount;
// }
//
// public HierarchicalCombinedLexicon newInstance() {
// return new HierarchicalCombinedLexicon(this.numSubStates,this.knownWordCount);
// }
//
//// public double[] score(String word, short tag, int loc, boolean noSmoothing, boolean isSignature) {
//// int globalWordIndex = wordIndexer.indexOf(word);
//// int globalSigIndex = wordIndexer.indexOf(getSignature(word, loc));
//// return score(globalWordIndex, globalSigIndex, tag, loc, noSmoothing, isSignature);
//// }
//
//
//
// public double[] score(StateSet stateSet, short tag, boolean noSmoothing, boolean isSignature) {
//// String sig = getSignature(stateSet.getWord(), stateSet.from);
//// if (stateSet.sigIndex != wordIndexer.indexOf(sig));
//// System.out.println("problem, signatures dont match!");
// if (stateSet.wordIndex == -2) {
// String word = stateSet.getWord();
// stateSet.wordIndex = (short)wordIndexer.indexOf(word);
// stateSet.sigIndex = (short)wordIndexer.indexOf(getSignature(word,stateSet.from));
// }
// return score(stateSet.wordIndex, stateSet.sigIndex, tag, stateSet.from, noSmoothing, isSignature);
// }
//
//
// public double[] score(int globalWordIndex, int globalSigIndex, short tag, int loc, boolean noSmoothing, boolean isSignature) {
// double[] res = new double[numSubStates[tag]];
// if (globalWordIndex!=-1) {
// int tagSpecificWordIndex = tagWordIndexer[tag].indexOf(globalWordIndex);
// if (tagSpecificWordIndex!=-1){
// for (int i=0; i<numSubStates[tag]; i++){
// res[i] = scores[tag][i][tagSpecificWordIndex];
// }
// }
// else {
// Arrays.fill(res, 1.0);
// }
// } else {
// Arrays.fill(res, 1.0);
// }
// if (globalWordIndex>=0 && wordCounter[globalWordIndex]>knownWordCount) return res;
// if (globalSigIndex!=-1) {
// int tagSpecificWordIndex = tagWordIndexer[tag].indexOf(globalSigIndex);
// if (tagSpecificWordIndex!=-1){
// for (int i=0; i<numSubStates[tag]; i++){
// res[i] *= scores[tag][i][tagSpecificWordIndex];
// }
// }
// } else{
// System.out.println("unseen sig");
// }
// if (smoother!=null) smoother.smooth(tag,res);
// return res;
// }
//
// public double[] scoreWord(StateSet stateSet, int tag){
// return scoreWord(stateSet.wordIndex, tag);
// }
//
// public double[] scoreWord(String word, int tag) {
// int globalWordIndex = wordIndexer.indexOf(word);
// return scoreWord(globalWordIndex, tag);
// }
//
// public double[] scoreWord(int globalWordIndex, int tag){
// double[] res = new double[numSubStates[tag]];
// if (globalWordIndex!=-1) {
// int tagSpecificWordIndex = tagWordIndexer[tag].indexOf(globalWordIndex);
// if (tagSpecificWordIndex!=-1){
// for (int i=0; i<numSubStates[tag]; i++){
// res[i] = scores[tag][i][tagSpecificWordIndex];
// }
// }
// } else {
// Arrays.fill(res, 1.0);
// }
// return res;
// }
//
//
// public double[] scoreSignature(StateSet stateSet, int tag) {
// return scoreSignature(stateSet.wordIndex, stateSet.sigIndex, tag);
// }
//
//
// public double[] scoreSignature(String word, String sig, int tag) {
// int globalWordIndex = wordIndexer.indexOf(word);
// int globalSigIndex = wordIndexer.indexOf(sig);
// return scoreSignature(globalWordIndex, globalSigIndex, tag);
// }
//
// public double[] scoreSignature(int globalWordIndex, int globalSigIndex, int tag) {
// if (globalWordIndex>=0 && wordCounter[globalWordIndex]>knownWordCount) return null;
// double[] res = new double[numSubStates[tag]];
// if (globalSigIndex!=-1) {
// int tagSpecificWordIndex = tagWordIndexer[tag].indexOf(globalSigIndex);
// if (tagSpecificWordIndex!=-1){
// for (int i=0; i<numSubStates[tag]; i++){
// res[i] += scores[tag][i][tagSpecificWordIndex];
// }
// }
// } else{
// System.out.println("unseen sig");
// }
// return res;
// }
//
// public void labelTrees(StateSetTreeList trainTrees){
// for (Tree<StateSet> tree : trainTrees){
// List<StateSet> words = tree.getYield();
// List<StateSet> tags = tree.getPreTerminalYield();
// int ind = 0;
// for (StateSet word : words){
// word.wordIndex = (short)wordIndexer.indexOf(word.getWord());
// short tag = tags.get(ind).getState();
//// if (wordIsAmbiguous[word.wordIndex]) {
// String sig = getSignature(word.getWord(), ind);
// wordIndexer.add(sig);
// word.sigIndex = (short)wordIndexer.indexOf(sig);
// tagWordIndexer[tag].add(wordIndexer.indexOf(sig));
//// }
//// else { word.sigIndex = -1; }
// ind++;
// }
// }
//
// }
//
// public void init(StateSetTreeList trainTrees){
// for (Tree<StateSet> tree : trainTrees){
// List<StateSet> words = tree.getYield();
// List<StateSet> tags = tree.getPreTerminalYield();
// int ind = 0;
// for (StateSet word : words){
// String sig = word.getWord();
// wordIndexer.add(sig);
// tagWordIndexer[tags.get(ind).getState()].add(wordIndexer.indexOf(sig));
// word.wordIndex = (short)wordIndexer.indexOf(sig);
// ind++;
// }
// }
// wordCounter = new int[wordIndexer.size()];
// tagWordIndexer = new IntegerIndexer[numStates];
// for (int tag=0; tag<numStates; tag++){
// tagWordIndexer[tag] = new IntegerIndexer(wordIndexer.size());
// }
//// int[] firstTag = new int[wordIndexer.size()];
////// wordIsAmbiguous = new boolean[wordIndexer.size()];
//// for (Tree<StateSet> tree : trainTrees){
//// List<StateSet> words = tree.getYield();
//// List<StateSet> tags = tree.getPreTerminalYield();
//// int ind = 0;
//// for (StateSet word : words){
//// short tag = tags.get(ind).getState();
//// ind++;
//// if (firstTag[word.wordIndex]==0) firstTag[word.wordIndex] = tag;
//// else if (firstTag[word.wordIndex] != tag) {
////// wordIsAmbiguous[word.wordIndex] = true;
//// }
//// }
//// }
// labelTrees(trainTrees);
// expectedCounts = new double[numStates][][];
// scores = new double[numStates][][];
// for (int tag=0; tag<numStates; tag++){
// expectedCounts[tag] = new double[numSubStates[tag]][tagWordIndexer[tag].size()];
// scores[tag] = new double[numSubStates[tag]][tagWordIndexer[tag].size()];
// }
// nWords = wordIndexer.size();
// }
//
//
//
//}