LexTreeLinguist.java example

Explorer

sphinx4-master
- sphinx4-core
  - src
    - main
      - java
        edu
        cmu
        sphinx
        alignment
        LongTextAligner.java
        SimpleTokenizer.java
        TextTokenizer.java
        Token.java
        USEnglishTokenizer.java
        tokenizer
        CharTokenizer.java
        DecisionTree.java
        FeatureSet.java
        Item.java
        ItemContents.java
        NumberExpander.java
        PathExtractor.java
        PrefixFSM.java
        PronounceableFSM.java
        Relation.java
        SuffixFSM.java
        Utterance.java
        WordRelation.java
        api
        AbstractSpeechRecognizer.java
        Configuration.java
        Context.java
        LiveSpeechRecognizer.java
        Microphone.java
        SpeechAligner.java
        SpeechResult.java
        SpeechSourceProvider.java
        StreamSpeechRecognizer.java
        decoder
        AbstractDecoder.java
        Decoder.java
        FrameDecoder.java
        ResultListener.java
        ResultProducer.java
        adaptation
        ClusteredDensityFileData.java
        Stats.java
        Transform.java
        pruner
        NullPruner.java
        Pruner.java
        SimplePruner.java
        scorer
        AcousticScorer.java
        BackgroundModelNormalizer.java
        MaxScoreNormalizer.java
        ScoreNormalizer.java
        ScoreProvider.java
        Scoreable.java
        SimpleAcousticScorer.java
        ThreadedAcousticScorer.java
        search
        ActiveList.java
        ActiveListFactory.java
        ActiveListManager.java
        AlternateHypothesisManager.java
        PartitionActiveListFactory.java
        Partitioner.java
        SearchManager.java
        SimpleActiveListFactory.java
        SimpleActiveListManager.java
        SimpleBreadthFirstSearchManager.java
        SkewPruningSearchManager.java
        SortingActiveListFactory.java
        Token.java
        TokenHeapSearchManager.java
        TokenSearchManager.java
        WordActiveListFactory.java
        WordPruningBreadthFirstLookaheadSearchManager.java
        WordPruningBreadthFirstSearchManager.java
        stats
        StateHistoryTracker.java
        TokenTracker.java
        TokenTypeTracker.java
        WordTracker.java
        frontend
        AutoCepstrum.java
        BaseDataProcessor.java
        Data.java
        DataBlocker.java
        DataEndSignal.java
        DataProcessingException.java
        DataProcessor.java
        DataStartSignal.java
        DoubleData.java
        FloatData.java
        FrontEnd.java
        GainControlProcessor.java
        Signal.java
        SignalListener.java
        databranch
        DataBufferProcessor.java
        DataListener.java
        DataProducer.java
        FrontEndSplitter.java
        denoise
        Denoise.java
        endpoint
        AbstractVoiceActivityDetector.java
        SpeechClassifiedData.java
        SpeechClassifier.java
        SpeechEndSignal.java
        SpeechMarker.java
        SpeechStartSignal.java
        feature
        AbstractFeatureExtractor.java
        BatchAGC.java
        BatchCMN.java
        BatchVarNorm.java
        ConcatFeatureExtractor.java
        DeltasFeatureExtractor.java
        FeatureTransform.java
        FrameDropper.java
        KaldiDeltasFeatureExtractor.java
        LiveCMN.java
        S3FeatureExtractor.java
        filter
        Dither.java
        EnergyFilter.java
        Preemphasizer.java
        frequencywarp
        FrequencyWarper.java
        LinearPredictor.java
        MelFilter.java
        MelFilter2.java
        MelFrequencyFilterBank.java
        MelFrequencyFilterBank2.java
        PLPCepstrumProducer.java
        PLPFilter.java
        PLPFrequencyFilterBank.java
        transform
        DiscreteCosineTransform.java
        DiscreteCosineTransform2.java
        DiscreteFourierTransform.java
        KaldiDiscreteCosineTransform.java
        Lifter.java
        util
        AudioContinuityTester.java
        AudioFileDataSource.java
        AudioFileProcessListener.java
        ConcatAudioFileDataSource.java
        ConcatFileDataSource.java
        DataConverter.java
        DataDumper.java
        DataUtil.java
        EnergyPlotter.java
        FrontEndUtils.java
        Microphone.java
        StreamCepstrumSource.java
        StreamDataSource.java
        StreamHTKCepstrum.java
        Utterance.java
        VUMeter.java
        VUMeterMonitor.java
        VUMeterPanel.java
        WavWriter.java
        window
        RaisedCosineWindower.java
        fst
        Arc.java
        Convert.java
        Export.java
        Fst.java
        ImmutableFst.java
        ImmutableState.java
        Import.java
        State.java
        operations
        ArcSort.java
        Compose.java
        Connect.java
        Determinize.java
        ExtendFinal.java
        ILabelCompare.java
        NShortestPaths.java
        OLabelCompare.java
        Project.java
        ProjectType.java
        Reverse.java
        RmEpsilon.java
        semiring
        LogSemiring.java
        ProbabilitySemiring.java
        Semiring.java
        TropicalSemiring.java
        sequitur
        SequiturImport.java
        utils
        Pair.java
        Utils.java
        instrumentation
        AccuracyTracker.java
        BestPathAccuracyTracker.java
        ConfigMonitor.java
        MemoryTracker.java
        Monitor.java
        RecognizerMonitor.java
        RejectionTracker.java
        Resetable.java
        SpeedTracker.java
        jsgf
        GrXMLGrammar.java
        GrXMLHandler.java
        JSGFGrammar.java
        JSGFGrammarException.java
        JSGFGrammarParseException.java
        JSGFRuleGrammar.java
        JSGFRuleGrammarFactory.java
        JSGFRuleGrammarManager.java
        parser
        JSGFParser.java
        JSGFParserConstants.java
        JSGFParserTokenManager.java
        JavaCharStream.java
        ParseException.java
        Token.java
        TokenMgrError.java
        rule
        JSGFRule.java
        JSGFRuleAlternatives.java
        JSGFRuleCount.java
        JSGFRuleName.java
        JSGFRuleSequence.java
        JSGFRuleTag.java
        JSGFRuleToken.java
        linguist
        HMMSearchState.java
        Linguist.java
        LinguistProcessor.java
        SearchGraph.java
        SearchState.java
        SearchStateArc.java
        UnitSearchState.java
        WordSearchState.java
        WordSequence.java
        acoustic
        AcousticModel.java
        Context.java
        HMM.java
        HMMPool.java
        HMMPosition.java
        HMMState.java
        HMMStateArc.java
        LeftRightContext.java
        Unit.java
        UnitManager.java
        tiedstate
        CompositeSenone.java
        GaussianMixture.java
        GaussianWeights.java
        HMMManager.java
        HTK
        GMMDiag.java
        HMMSet.java
        HMMState.java
        Lab.java
        NamesConversion.java
        SingleHMM.java
        HTKLoader.java
        KaldiLoader.java
        LazyHmmManager.java
        Loader.java
        MixtureComponent.java
        Pool.java
        Saver.java
        ScoreCachingSenone.java
        Senone.java
        SenoneHMM.java
        SenoneHMMState.java
        SenoneSequence.java
        Sphinx3Loader.java
        TiedStateAcousticModel.java
        kaldi
        ConstantEventMap.java
        DiagGmm.java
        EventMap.java
        EventMapWithKey.java
        KaldiGmmPool.java
        KaldiTextParser.java
        SplitEventMap.java
        TableEventMap.java
        TransitionModel.java
        tiedmixture
        MixtureComponentSet.java
        MixtureComponentSetScores.java
        PrunableMixtureComponent.java
        SetBasedGaussianMixture.java
        trainer
        Buffer.java
        HMMPoolManager.java
        ModelInitializerLoader.java
        Sphinx3Saver.java
        TrainerAcousticModel.java
        TrainerScore.java
        trivial
        TrivialAcousticModel.java
        aflat
        AFlatLinguist.java
        PhoneLoop.java
        allphone
        AllphoneLinguist.java
        AllphoneSearchGraph.java
        PhoneHmmSearchState.java
        PhoneNonEmittingSearchState.java
        PhoneWordSearchState.java
        dflat
        DynamicFlatLinguist.java
        OutOfGrammarGraph.java
        dictionary
        Dictionary.java
        MappingDictionary.java
        Pronunciation.java
        TextDictionary.java
        Word.java
        flat
        AlternativeState.java
        BranchState.java
        CIPhoneLoop.java
        ExtendedUnitState.java
        FlatLinguist.java
        GrammarPoint.java
        GrammarState.java
        HMMStateState.java
        NonEmittingHMMState.java
        PronunciationState.java
        SentenceHMMState.java
        SentenceHMMStateArc.java
        SentenceHMMStateVisitor.java
        UnitState.java
        WordState.java
        g2p
        G2PConverter.java
        Path.java
        PathComparator.java
        language
        classes
        ClassBasedLanguageModel.java
        ClassMap.java
        ClassPresentingDictionary.java
        ClassProbability.java
        grammar
        AlignerGrammar.java
        BatchForcedAlignerGrammar.java
        FSTGrammar.java
        ForcedAlignerGrammar.java
        Grammar.java
        GrammarArc.java
        GrammarInterface.java
        GrammarNode.java
        LMGrammar.java
        LatticeGrammar.java
        SimpleWordListGrammar.java
        ngram
        DynamicTrigramModel.java
        InterpolatedLanguageModel.java
        KeywordOptimizerModel.java
        LanguageModel.java
        NetworkLanguageModel.java
        SimpleNGramModel.java
        large
        BinaryLoader.java
        BinaryStreamLoader.java
        KeywordOptimizerLargeNGramModel.java
        LargeNGramModel.java
        LargeTrigramModel.java
        NGramBuffer.java
        NGramProbability.java
        NMaxGramBuffer.java
        UnigramProbability.java
        trie
        BinaryLoader.java
        NgramTrie.java
        NgramTrieBitarr.java
        NgramTrieModel.java
        NgramTrieQuant.java
        lextree
        HMMTree.java
        LexTreeLinguist.java
        util
        GDLDumper.java
        LRUCache.java
        LinguistDumper.java
        LinguistStats.java
        LinguistTimer.java
        recognizer
        Recognizer.java
        StateListener.java
        result
        BoundedPriorityQueue.java
        Edge.java
        FrameStatistics.java
        GDLLatticeFactory.java
        Lattice.java
        LatticeOptimizer.java
        LatticeRescorer.java
        Nbest.java
        Node.java
        Path.java
        Result.java
        TokenGraphDumper.java
        WordResult.java
        speakerid
        Segment.java
        SpeakerCluster.java
        SpeakerIdentification.java
        Tester.java
        tools
        aligner
        Aligner.java
        audio
        AudioData.java
        AudioDataInputStream.java
        AudioPanel.java
        AudioPlayer.java
        AudioTool.java
        CepstrumPanel.java
        Downsampler.java
        FilenameDialog.java
        RawReader.java
        RawRecorder.java
        RawWriter.java
        Sinusoid.java
        SpectrogramPanel.java
        Utils.java
        bandwidth
        BandDetector.java
        batch
        BatchDecoderRecognizer.java
        BatchForcedAlignerRecognizer.java
        BatchModeRecognizer.java
        BatchNISTRecognizer.java
        SphinxShell.java
        endpoint
        Segmenter.java
        feature
        FeatureFileDumper.java
        live
        LiveModeRecognizer.java
        transcriber
        Transcriber.java
        trainer
        BaumWelchLearner.java
        BuildTranscriptHMM.java
        ControlFile.java
        Edge.java
        FlatInitializerLearner.java
        Graph.java
        Learner.java
        Node.java
        NodeType.java
        SimpleControlFile.java
        SimpleTrainManager.java
        SimpleTranscript.java
        SimpleUtterance.java
        Stage.java
        TrainManager.java
        Trainer.java
        TrainerDictionary.java
        TrainerLink.java
        TrainerToken.java
        Transcript.java
        TranscriptGraph.java
        TranscriptHMMGraph.java
        Utterance.java
        UtteranceGraph.java
        UtteranceHMMGraph.java
        util
        BatchFile.java
        BatchItem.java
        BatchManager.java
        Cache.java
        CommandInterface.java
        CommandInterpreter.java
        Complex.java
        CustomThreadFactory.java
        ExtendedStreamTokenizer.java
        FileReferenceSource.java
        GapInsertionDetector.java
        LogMath.java
        MatrixUtils.java
        NISTAlign.java
        PooledBatchManager.java
        Range.java
        ReferenceSource.java
        SimpleBatchManager.java
        SocketCommandClient.java
        SocketCommandInterpreter.java
        SphinxHelp.java
        SphinxLogFormatter.java
        StatisticsVariable.java
        StreamFactory.java
        TimeFrame.java
        Timer.java
        TimerPool.java
        Utilities.java
        machlearn
        OVector.java
        props
        ConfCategory.java
        ConfigHandler.java
        Configurable.java
        ConfigurableAdapter.java
        ConfigurationChangeListener.java
        ConfigurationManager.java
        ConfigurationManagerUtils.java
        InternalConfigurationException.java
        PropertyException.java
        PropertySheet.java
        PropertyType.java
        RawPropertyData.java
        S4Boolean.java
        S4Component.java
        S4ComponentList.java
        S4Double.java
        S4Integer.java
        S4PropWrapper.java
        S4Property.java
        S4String.java
        S4StringList.java
        SaxLoader.java
        tools
        GDLDumper.java
        HTMLDumper.java
    - test
      - java
        edu
        cmu
        sphinx
        alignment
        SpeechAlignerTest.java
        TextAlignerLargeTest.java
        TextAlignerSmallTest.java
        UsEnglishWordExpanderTest.java
        api
        LiveRecognizerTest.java
        decoder
        scorer
        ScorerTests.java
        search
        test
        PartitionerTest.java
        frontend
        AudioDataSourcesTest.java
        DataBlockerTest.java
        FrontendElementTest.java
        RandomDataProcessor.java
        RandomSampleRepeater.java
        SpeechMarkerTest.java
        fst
        ArcSortTest.java
        ComposeEpsilonTest.java
        ComposeTest.java
        ConnectTest.java
        DeterminizeTest.java
        ImportTest.java
        NShortestPathsTest.java
        ProjectTest.java
        ReverseTest.java
        RmEpsilonTest.java
        linguist
        WordSequenceTest.java
        acoustic
        tiedstate
        test
        MixtureComponentTest.java
        dictionary
        DictionaryTest.java
        language
        grammar
        BatchForcedAlignerGrammarTest.java
        FSTGrammarTest.java
        ngram
        DynamicTrigramModelTest.java
        large
        LargeNgramTest.java
        trie
        NgramTrieTest.java
        result
        LatticeCompTest.java
        LatticeIOTest.java
        PosteriorTest.java
        tools
        bandwidth
        BandDetectorTest.java
        util
        props
        AnotherDummyFrontEnd.java
        AnotherDummyProcessor.java
        CMUTests.java
        ComponentListTests.java
        ComponentPropertyTest.java
        ConfigurationManagerTest.java
        DummyComp.java
        DummyFrontEnd.java
        DummyFrontEndProcessor.java
        DummyProcessor.java
        TestConfigurable.java
- sphinx4-samples
  - src
    - main
      - java
        edu
        cmu
        sphinx
        demo
        DemoRunner.java
        aligner
        AlignerDemo.java
        allphone
        AllphoneDemo.java
        dialog
        DialogDemo.java
        speakerid
        SpeakerIdentificationDemo.java
        transcriber
        TranscriberDemo.java

/*
 * Copyright 1999-2002 Carnegie Mellon University.
 * Portions Copyright 2002 Sun Microsystems, Inc.
 * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * See the file "license.terms" for information on usage and
 * redistribution of this file, and for a DISCLAIMER OF ALL
 * WARRANTIES.
 *
 */

package edu.cmu.sphinx.linguist.lextree;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;

import edu.cmu.sphinx.decoder.scorer.ScoreProvider;
import edu.cmu.sphinx.frontend.Data;
import edu.cmu.sphinx.linguist.HMMSearchState;
import edu.cmu.sphinx.linguist.Linguist;
import edu.cmu.sphinx.linguist.SearchGraph;
import edu.cmu.sphinx.linguist.SearchState;
import edu.cmu.sphinx.linguist.SearchStateArc;
import edu.cmu.sphinx.linguist.UnitSearchState;
import edu.cmu.sphinx.linguist.WordSearchState;
import edu.cmu.sphinx.linguist.WordSequence;
import edu.cmu.sphinx.linguist.acoustic.AcousticModel;
import edu.cmu.sphinx.linguist.acoustic.HMM;
import edu.cmu.sphinx.linguist.acoustic.HMMPool;
import edu.cmu.sphinx.linguist.acoustic.HMMState;
import edu.cmu.sphinx.linguist.acoustic.HMMStateArc;
import edu.cmu.sphinx.linguist.acoustic.Unit;
import edu.cmu.sphinx.linguist.acoustic.UnitManager;
import edu.cmu.sphinx.linguist.dictionary.Dictionary;
import edu.cmu.sphinx.linguist.dictionary.Pronunciation;
import edu.cmu.sphinx.linguist.dictionary.Word;
import edu.cmu.sphinx.linguist.language.grammar.Grammar;
import edu.cmu.sphinx.linguist.language.ngram.LanguageModel;
import edu.cmu.sphinx.linguist.util.LRUCache;
import edu.cmu.sphinx.util.LogMath;
import edu.cmu.sphinx.util.TimerPool;
import edu.cmu.sphinx.util.props.PropertyException;
import edu.cmu.sphinx.util.props.PropertySheet;
import edu.cmu.sphinx.util.props.S4Boolean;
import edu.cmu.sphinx.util.props.S4Component;
import edu.cmu.sphinx.util.props.S4Double;
import edu.cmu.sphinx.util.props.S4Integer;

/**
 * A linguist that can represent large vocabularies efficiently. This class implements the Linguist interface. The main
 * role of any linguist is to represent the search space for the decoder. The initial state in the search space can be
 * retrieved by a SearchManager via a call to <code> getInitialSearchState</code>. This method returns a SearchState.
 * Successor states can be retrieved via calls to <code>SearchState.getSuccessors().</code>. There are a number of
 * search state sub-interfaces that are used to indicate different types of states in the search space:
 * <ul> <li><b>WordSearchState </b>- represents a word in the search space. <li><b>UnitSearchState </b>- represents a
 * unit in the search space <li><b>HMMSearchState </b> represents an HMM state in the search space </ul>
 * A linguist has a great deal of latitude about the order in which it returns states. For instance a 'flat' linguist
 * may return a WordState at the beginning of a word, while a 'tree' linguist may return WordStates at the ending of a
 * word. Likewise, a linguist may omit certain state types completely (such as a unit state). Some Search Managers may
 * want to know a priori the order in which states will be generated by the linguist. The method
 * <code>getSearchStateOrder</code> can be used to retrieve the order of state returned by the linguist.
 * <p>
 * Depending on the vocabulary size and topology, the search space represented by the linguist may include a very large
 * number of states. Some linguists will generate the search states dynamically, that is, the object representing a
 * particular state in the search space is not created until it is needed by the SearchManager. SearchManagers often
 * need to be able to determine if a particular state has been entered before by comparing states. Because SearchStates
 * may be generated dynamically, the <code>SearchState.equals()</code> call (as opposed to the reference equals '=='
 * method) should be used to determine if states are equal. The states returned by the linguist will generally provide
 * very efficient implementations of <code>equals</code> and <code>hashCode</code>. This will allow a SearchManager to
 * maintain collections of states in HashMaps efficiently.
 * <p>
 * <b>LexTeeLinguist Characteristics </b>
 * <p>
 * Some characteristics of this linguist: <ul> <li><b>Dynamic </b>- the linguist generates search states on the fly,
 * greatly reducing the required memory footprint <li><b>tree topology </b> this linguist represents the search space as
 * an inverted tree. Units near the roots of word are shared among many different words. These reduces the amount of
 * states that need to be considered during the search. <li><b>HMM sharing </b>- because of state tying in the acoustic
 * models, it is often the case that triphone units that differ in the right context actually are represented by the
 * same HMM. This linguist recognizes this case and will use a single state to represent the HMM instead of two states.
 * This can greatly reduce the number of states generated by the linguist. <li><b>Small-footprint </b>- this linguist
 * uses a few other techniques to reduce the overall footprint of the search space. One technique that is particularly
 * helpful is to share the end word units (where the largest fanout of states occurs) across all of the words. For a 60K
 * word vocabulary, these can result in a reduction in tree nodes of about 2 million to around 3,000. <li><b>Quick
 * loading </b>- this linguist can compile the search space very quickly. A 60K word vocabulary can be made ready in
 * less than 10 seconds. </ul>
 * <p>
 * This linguist is not a general purpose linguist. It does impose some constraints:
 * <ul> <li><b>unit size </b>- this linguist will units that are no larger than triphones. <li><b>n-gram grammars </b>-
 * this linguist will generate the search space directly from the N-Gram language model. The vocabulary supported is the
 * intersection of the words found in the language model and the words that exist in the Dictionary. It is assumed that
 * all sequences of words in the vocabulary are valid. This linguist doesn't support arbitrary grammars. </ul>
 * <p>
 * <b>Design Notes </b> The following are some notes describing the design of this linguist. They may be helpful to
 * those who want to understand how this linguist works but are not necessary if you are only interested in using this
 * linguist.
 * <p>
 * <b>Search Space Representation </b> It has been shown that representing the search space as a tree can greatly reduce
 * the number of active states in a search since the units at the beginnings of words can be shared across multiple
 * words. For example, with a large vocabulary (60K words), at the end of a word, with a flat representation, we have to
 * provide transitions to the initial state of each possible word. That is 60K transitions. In a tree based system we
 * need to only provide transitions to each initial phone (within its context). That is about 1600 transitions. This is
 * a substantial reduction. Conceptually, this tree consists of a node for each possible initial unit. Each node can
 * have an arbitrary number of children which can be either unit nodes or word nodes.
 * <p>
 * This linguist uses the HMMTree class to build and represent the tree. The HMMTree is given the dictionary and
 * language model and builds the lextree. Instead of representing the nodes in the tree as phonemes and words as is
 * typically done, the HMMTree represents the tree as HMMs and words. The HMM is essentially a unit within its context.
 * This is typically a triphone (although for some units (such as SIL) it is a simple phone. Representing the nodes as
 * HMM instead of nodes yields a much larger tree, but also has some advantages:
 * <ul> <li>Because of state-tying in the acoustic models, many distinct triphones actually share an HMM. Representing
 * the nodes as HMMs allows these shared HMMs to be represented in the tree only once instead of many times if we
 * representing states as phones or triphones. This leads to a reduction in the actual number of states that are
 * considered during a search. Experiments have shown that this can reduce the required beam by a factor of 2 or 3.
 * <li>By representing the nodes as HMM, we avoid having to lookup the HMM for a particular triphone during the search.
 * This is a modest savings. </ul>
 * There are some disadvantages in representing the tree with HMMs:
 * <ul> <li><b>size</b> since HMMs represent units in their context, we have many more copies of each node. For
 * instance, instead of having a single unit representing the initial 'd' in the word 'dog' we would have about 40 HMMs,
 * one for each possible left context. <li><b>speed </b> building the much larger HMM tree can take much more time,
 * since many more nodes are needed to represent the tree. <li><b>complexity </b> representing the tree with HMMs is
 * more complex. There are multiple entry points for each word/unit that have to be dealt with. </ul>
 * Luckily the size and speed issues can be mitigated (by adding a bit more complexity of course). The bulk of the nodes
 * in the HMM tree are the word ending nodes. There is a word ending node for each possible right context. To reduce
 * space, all of the word ending nodes are replaced by a single EndNode. During the search, the actual HMM nodes for a
 * particular EndNode are generated on request. These sets of HMM nodes can be shared among different word endings, and
 * therefore are cached. The effect of using this EndNode optimization is to reduce the space required by the tree by
 * about 300mb and the time required to generate the tree from about 60 seconds to about 6 seconds.
 *
 * <p>
 * <b>Word Histories </b>
 * <p>
 * We use explicit backoff for word histories. That technique is proven to be useful and save number of
 * states. The reasoning is the following. With a vocabulary of size N, you have N^2 unique bigram
 * histories. So the token stack will have N^2*K unique tokens, where K is the number of states per token.
 * For a 100k vocab, 3 states per HMM, that will be 3*10^10 tokens (max). Of course, a large majority
 * of them will be pruned, but really, its still way too much. If you stick with the <b>actual</b>  K-gram
 * used (i.e. accounting explicitly for backoff), then this reduces <b>tremendously</b>.
 * Most bigrams dont have corresponding trigrams.  Not all 10^10 bigrams have trigrams. We only
 * need to store as many explicit tokens as the number of bigrams that have trigrams.
 */
public class LexTreeLinguist implements Linguist {

    /** The property that defines the grammar to use when building the search graph */
    @S4Component(type = Grammar.class)
    public final static String PROP_GRAMMAR = "grammar";

    /** The property that defines the acoustic model to use when building the search graph */
    @S4Component(type = AcousticModel.class)
    public final static String PROP_ACOUSTIC_MODEL = "acousticModel";

    /** The property that defines the unit manager to use when building the search graph */
    @S4Component(type = UnitManager.class, defaultClass = UnitManager.class)
    public final static String PROP_UNIT_MANAGER = "unitManager";

    /**
     * The property that determines whether or not full word histories are used to
     * determine when two states are equal.
     */
    @S4Boolean(defaultValue = true)
    public final static String PROP_FULL_WORD_HISTORIES = "fullWordHistories";

    /** The property for the language model to be used by this grammar */
    @S4Component(type = LanguageModel.class)
    public final static String PROP_LANGUAGE_MODEL = "languageModel";

    /** The property that defines the dictionary to use for this grammar */
    @S4Component(type = Dictionary.class)
    public final static String PROP_DICTIONARY = "dictionary";

    /** The property that defines the size of the arc cache (zero to disable the cache). */
    @S4Integer(defaultValue = 0)
    public final static String PROP_CACHE_SIZE = "cacheSize";

    /** The property that controls whether filler words are automatically added to the vocabulary */
    @S4Boolean(defaultValue = false)
    public final static String PROP_ADD_FILLER_WORDS = "addFillerWords";

    /**
     * The property to control whether or not the linguist will generate unit states.   When this property is false the
     * linguist may omit UnitSearchState states.  For some search algorithms this will allow for a faster search with
     * more compact results.
     */
    @S4Boolean(defaultValue = false)
    public final static String PROP_GENERATE_UNIT_STATES = "generateUnitStates";

    /**
     * The property that determines whether or not unigram probabilities are
     * smeared through the lextree. During the expansion of the tree the
     * language probability could be only calculated when we reach word end node.
     * Until that point we need to keep path alive and give it some language
     * probability. See
     *
     * Alleva, F., Huang, X. and Hwang, M.-Y., "Improvements on the pronunciation
     * prefix tree search organization", Proceedings of ICASSP, pp. 133-136,
     * Atlanta, GA, 1996.
     *
     * for the description of this technique.
     */
    @S4Boolean(defaultValue = true)
    public final static String PROP_WANT_UNIGRAM_SMEAR = "wantUnigramSmear";


    /** The property that determines the weight of the smear. See {@link LexTreeLinguist#PROP_WANT_UNIGRAM_SMEAR} */
    @S4Double(defaultValue = 1.0)
    public final static String PROP_UNIGRAM_SMEAR_WEIGHT = "unigramSmearWeight";


    // just for detailed debugging
    private final static SearchStateArc[] EMPTY_ARC = new SearchStateArc[0];

    // ----------------------------------
    // Subcomponents that are configured
    // by the property sheet
    // -----------------------------------
    private LanguageModel languageModel;
    private AcousticModel acousticModel;
    private LogMath logMath;
    private Dictionary dictionary;
    private UnitManager unitManager;

    // ------------------------------------
    // Data that is configured by the
    // property sheet
    // ------------------------------------
    private Logger logger;
    protected boolean addFillerWords;
    private boolean generateUnitStates;
    private boolean wantUnigramSmear = true;
    private float unigramSmearWeight = 1.0f;
    private boolean cacheEnabled;
    private int maxArcCacheSize;

    protected float languageWeight;
    private float logWordInsertionProbability;
    private float logUnitInsertionProbability;
    private float logFillerInsertionProbability;
    private float logSilenceInsertionProbability;
    private float logOne;

    // ------------------------------------
    // Data used for building and maintaining
    // the search graph
    // -------------------------------------
    private Word sentenceEndWord;
    private Word[] sentenceStartWordArray;
    private SearchGraph searchGraph;
    private HMMPool hmmPool;
    private LRUCache<LexTreeState, SearchStateArc[]> arcCache;
    private int maxDepth;

    protected HMMTree hmmTree;

    private int cacheTrys;
    private int cacheHits;

    public LexTreeLinguist(AcousticModel acousticModel, UnitManager unitManager,
            LanguageModel languageModel, Dictionary dictionary, boolean fullWordHistories, boolean wantUnigramSmear,
            double wordInsertionProbability, double silenceInsertionProbability, double fillerInsertionProbability,
            double unitInsertionProbability, float languageWeight, boolean addFillerWords, boolean generateUnitStates,
            float unigramSmearWeight, int maxArcCacheSize) {

        logger = Logger.getLogger(getClass().getName());

        this.acousticModel = acousticModel;
        this.logMath = LogMath.getLogMath();
        this.unitManager = unitManager;
        this.languageModel = languageModel;
        this.dictionary = dictionary;

        this.wantUnigramSmear = wantUnigramSmear;
        this.logWordInsertionProbability = logMath.linearToLog(wordInsertionProbability);
        this.logSilenceInsertionProbability = logMath.linearToLog(silenceInsertionProbability);
        this.logFillerInsertionProbability = logMath.linearToLog(fillerInsertionProbability);
        this.logUnitInsertionProbability = logMath.linearToLog(unitInsertionProbability);
        this.languageWeight = languageWeight;
        this.addFillerWords = addFillerWords;
        this.generateUnitStates = generateUnitStates;
        this.unigramSmearWeight = unigramSmearWeight;
        this.maxArcCacheSize = maxArcCacheSize;

        cacheEnabled = maxArcCacheSize > 0;
        if( cacheEnabled ) {
            arcCache = new LRUCache<LexTreeState, SearchStateArc[]>(maxArcCacheSize);
        }
    }

    public LexTreeLinguist() {

    }

    /*
    * (non-Javadoc)
    *
    * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
    */
    public void newProperties(PropertySheet ps) throws PropertyException {
        logger = ps.getLogger();
        logMath = LogMath.getLogMath();

        acousticModel = (AcousticModel) ps.getComponent(PROP_ACOUSTIC_MODEL);
        unitManager = (UnitManager) ps.getComponent(PROP_UNIT_MANAGER);
        languageModel = (LanguageModel) ps.getComponent(PROP_LANGUAGE_MODEL);
        dictionary = (Dictionary) ps.getComponent(PROP_DICTIONARY);

        wantUnigramSmear = ps.getBoolean(PROP_WANT_UNIGRAM_SMEAR);
        logWordInsertionProbability = logMath.linearToLog(ps.getDouble(PROP_WORD_INSERTION_PROBABILITY));
        logSilenceInsertionProbability = logMath.linearToLog(ps.getDouble(PROP_SILENCE_INSERTION_PROBABILITY));
        logFillerInsertionProbability = logMath.linearToLog(ps.getDouble(PROP_FILLER_INSERTION_PROBABILITY));
        logUnitInsertionProbability = logMath.linearToLog(ps.getDouble(PROP_UNIT_INSERTION_PROBABILITY));
        languageWeight = ps.getFloat(PROP_LANGUAGE_WEIGHT);
        addFillerWords = (ps.getBoolean(PROP_ADD_FILLER_WORDS));
        generateUnitStates = (ps.getBoolean(PROP_GENERATE_UNIT_STATES));
        unigramSmearWeight = ps.getFloat(PROP_UNIGRAM_SMEAR_WEIGHT);
        maxArcCacheSize = ps.getInt(PROP_CACHE_SIZE);

        cacheEnabled = maxArcCacheSize > 0;
        if(cacheEnabled) {
            arcCache = new LRUCache<LexTreeState, SearchStateArc[]>(maxArcCacheSize);
        }
    }


    /*
    * (non-Javadoc)
    *
    * @see edu.cmu.sphinx.linguist.Linguist#allocate()
    */
    public void allocate() throws IOException {
        dictionary.allocate();
        acousticModel.allocate();
        languageModel.allocate();
        compileGrammar();
    }


    /*
    * (non-Javadoc)
    *
    * @see edu.cmu.sphinx.linguist.Linguist#deallocate()
    */
    public void deallocate() throws IOException {
        if (acousticModel != null)
    	    acousticModel.deallocate();
    	if (dictionary != null)
    	    dictionary.deallocate();
    	if (languageModel != null)
    	    languageModel.deallocate();
    	hmmTree = null;
    }


    /*
    * (non-Javadoc)
    *
    * @see edu.cmu.sphinx.linguist.Linguist#getSearchGraph()
    */
    public SearchGraph getSearchGraph() {
        return searchGraph;
    }


    /** Called before a recognition */
    public void startRecognition() {
    }


    /** Called after a recognition */
    public void stopRecognition() {
        languageModel.onUtteranceEnd();
    }


    /**
     * Retrieves the language model for this linguist
     *
     * @return the language model (or null if there is none)
     */
    public LanguageModel getLanguageModel() {
        return languageModel;
    }


    public Dictionary getDictionary() {
        return dictionary;
    }


    /**
     * retrieves the initial language state
     *
     * @return the initial language state
     */
    private SearchState getInitialSearchState() {
        InitialWordNode node = hmmTree.getInitialNode();

        if (node == null)
    	    throw new RuntimeException("Language model has no entry for initial word <s>");

        return new LexTreeWordState(node, node.getParent(), (new WordSequence(sentenceStartWordArray)).trim(
                maxDepth - 1), 0f, logOne, logOne);
    }


    /** Compiles the n-gram into a lex tree that is used during the search */
    private void compileGrammar() {
        TimerPool.getTimer(this, "Compile").start();

        sentenceEndWord = dictionary.getSentenceEndWord();
        sentenceStartWordArray = new Word[1];
        sentenceStartWordArray[0] = dictionary.getSentenceStartWord();
        maxDepth = languageModel.getMaxDepth();

        generateHmmTree();

        TimerPool.getTimer(this,"Compile").stop();
        // Now that we are all done, dump out some interesting
        // information about the process

        searchGraph = new LexTreeSearchGraph(getInitialSearchState());
    }


    protected void generateHmmTree() {
        hmmPool = new HMMPool(acousticModel, logger, unitManager);
        hmmTree = new HMMTree(hmmPool, dictionary, languageModel,
                              addFillerWords, languageWeight);

        hmmPool.dumpInfo();
    }


    class LexTreeSearchGraph implements SearchGraph {

        /** An array of classes that represents the order in which the states will be returned. */

        private SearchState initialState;


        /**
         * Constructs a search graph with the given initial state
         *
         * @param initialState the initial state
         */
        LexTreeSearchGraph(SearchState initialState) {
            this.initialState = initialState;
        }


        /*
        * (non-Javadoc)
        *
        * @see edu.cmu.sphinx.linguist.SearchGraph#getInitialState()
        */
        public SearchState getInitialState() {
            return initialState;
        }


        /*
        * (non-Javadoc)
        *
        * @see edu.cmu.sphinx.linguist.SearchGraph#getSearchStateOrder()
        */
        public int getNumStateOrder() {
            return 6;
        }
        
        public boolean getWordTokenFirst() {
            return false;
        }
    }

    /**
     * The LexTreeLinguist returns language states to the search manager. This class forms the base implementation for
     * all language states returned. This LexTreeState keeps track of the probability of entering this state (a
     * language+insertion probability) as well as the unit history. The unit history consists of the LexTree nodes that
     * correspond to the left, center and right contexts.
     * <p>
     * This is an abstract class, subclasses must implement the getSuccessorss method.
     */
    abstract class LexTreeState implements SearchState, SearchStateArc {

        private final Node node;
        private final WordSequence wordSequence;
        final float currentSmearTerm;
        final float currentSmearProb;


        /**
         * Creates a LexTreeState.
         *
         * @param node         the node associated with this state
         * @param wordSequence the history of words up until this point


         */
        LexTreeState(Node node, WordSequence wordSequence, float smearTerm,
                     float smearProb) {
            this.node = node;
            this.wordSequence = wordSequence;
            currentSmearTerm = smearTerm;
            currentSmearProb = smearProb;
        }


        /**
         * Gets the unique signature for this state. The signature building code is slow and should only be used for
         * non-time-critical tasks such as plotting states.
         *
         * @return the signature
         */
        public String getSignature() {
            return "lts-" + node.hashCode() + "-ws-" + wordSequence;
        }


        public float getSmearTerm() {
            return currentSmearTerm;
        }


        public float getSmearProb() {
            return currentSmearProb;
        }


        /**
         * Generate a hashcode for an object
         *
         * @return the hashcode
         */
        @Override
        public int hashCode() {
            int hashCode = wordSequence.hashCode() * 37;
            hashCode += node.hashCode();
            return hashCode;
        }


        /**
         * Determines if the given object is equal to this object
         *
         * @param o the object to test
         * @return <code>true</code> if the object is equal to this
         */
        @Override
        public boolean equals(Object o) {
            if (o == this) {
                return true;
            } else if (o instanceof LexTreeState) {
                LexTreeState other = (LexTreeState) o;
                if (node != other.node)
                    return false;
                return wordSequence.equals(other.wordSequence);
            } else {
                return false;
            }
        }


        /**
         * Gets a successor to this search state
         *
         * @return the successor state
         */
        public SearchState getState() {
            return this;
        }


        /**
         * Gets the composite probability of entering this state
         *
         * @return the log probability
         */
        public float getProbability() {
            return getLanguageProbability() + getInsertionProbability();
        }


        /**
         * Gets the language probability of entering this state
         *
         * @return the log probability
         */
        public float getLanguageProbability() {
            return logOne;
        }


        /**
         * Gets the insertion probability of entering this state
         *
         * @return the log probability
         */
        public float getInsertionProbability() {
            return logOne;
        }


        /**
         * Determines if this is an emitting state
         *
         * @return <code>true</code> if this is an emitting state.
         */
        public boolean isEmitting() {
            return false;
        }


        /**
         * Determines if this is a final state
         *
         * @return <code>true</code> if this is an final state.
         */
        public boolean isFinal() {
            return false;
        }


        /**
         * Gets the hmm tree node representing the unit
         *
         * @return the unit lex node
         */
        protected Node getNode() {
            return node;
        }


        /**
         * Returns the word sequence for this state
         *
         * @return the word sequence
         */
        public WordSequence getWordHistory() {
            return wordSequence;
        }


        public Node getLexState() {
            return node;
        }


        /**
         * Returns the list of successors to this state
         *
         * @return a list of SearchState objects
         */
        public SearchStateArc[] getSuccessors() {
            SearchStateArc[] arcs = getCachedArcs();
            if (arcs == null) {
                arcs = getSuccessors(node);
                putCachedArcs(arcs);
            }
            return arcs;
        }


        /**
         * Returns the list of successors to this state
         *
         * @param theNode node to get successors
         * @return a list of SearchState objects
         */
        protected SearchStateArc[] getSuccessors(Node theNode) {
            Node[] nodes = theNode.getSuccessors();
            SearchStateArc[] arcs = new SearchStateArc[nodes.length];
            // System.out.println("Arc: "+ this);
            int i = 0;
            for (Node nextNode : nodes) {
                //  System.out.println(" " + nextNode);
                if (nextNode instanceof WordNode) {
                    arcs[i] = createWordStateArc((WordNode) nextNode,
                            (HMMNode) getNode(), this);
                } else if (nextNode instanceof EndNode) {
                    arcs[i] = createEndUnitArc((EndNode) nextNode, this);
                } else {
                    arcs[i] = createUnitStateArc((HMMNode) nextNode, this);
                }
                i++;
            }
            return arcs;
        }


        /**
         * Creates a word search state for the given word node
         * @param wordNode the wordNode
         * @param lastUnit last unit of the word
         * @param previous previous state
         * @return the search state for the wordNode
         */
        protected SearchStateArc createWordStateArc(WordNode wordNode,
                                                    HMMNode lastUnit, LexTreeState previous) {
            // System.out.println("CWSA " + wordNode + " fup " + fixupProb);
            float languageProbability = logOne;
            Word nextWord = wordNode.getWord();
            float smearTerm = previous.getSmearTerm();

            if (nextWord.isFiller() && nextWord != sentenceEndWord) {
                return new LexTreeWordState(wordNode, lastUnit,
                        wordSequence,
                        smearTerm, logOne, languageProbability);
            }

            WordSequence nextWordSequence = wordSequence.addWord(nextWord, maxDepth);
            float probability = languageModel.getProbability(nextWordSequence) * languageWeight;
            smearTerm = getSmearTermFromLanguageModel(nextWordSequence);
            // System.out.println("LP " + nextWordSequence + " " +
            // logProbability);
            // subtract off the previously applied smear probability
            languageProbability = probability - previous.getSmearProb();
            
            if (nextWord == sentenceEndWord) {
                return new LexTreeEndWordState(wordNode, lastUnit,
                        nextWordSequence.trim(maxDepth - 1),
                        smearTerm, logOne, languageProbability);
            }

            return new LexTreeWordState(wordNode, lastUnit,
                        nextWordSequence.trim(maxDepth - 1),
                        smearTerm, logOne, languageProbability);
        }


        /**
         * Creates a unit search state for the given unit node
         *
         * @param hmmNode the unit node

         * @return the search state
         */
        SearchStateArc createUnitStateArc(HMMNode hmmNode, LexTreeState previous) {
            SearchStateArc arc;
            // System.out.println("CUSA " + hmmNode);
            float insertionProbability = calculateInsertionProbability(hmmNode);
            float smearProbability = getUnigramSmear(hmmNode)
                    + previous.getSmearTerm();
            float languageProbability = smearProbability - previous.getSmearProb();

            // if we want a unit state create it, otherwise
            // get the first hmm state of the unit

            if (generateUnitStates) {
                arc = new LexTreeUnitState(hmmNode, getWordHistory(), previous
                        .getSmearTerm(), smearProbability, languageProbability,
                        insertionProbability);
            } else {
                HMM hmm = hmmNode.getHMM();
                arc = new LexTreeHMMState(hmmNode, getWordHistory(), previous
                        .getSmearTerm(), smearProbability, hmm.getInitialState(),
                        languageProbability, insertionProbability, null);
            }
            return arc;
        }


        /**
         * Creates a unit search state for the given unit node
         *
         * @param endNode  the unit node
         * @param previous the previous state
         * @return the search state
         */
        SearchStateArc createEndUnitArc(EndNode endNode, LexTreeState previous) {
            float smearProbability = getUnigramSmear(endNode)
                    + previous.getSmearTerm();
            float languageProbability = smearProbability - previous.getSmearProb();
            float insertionProbability = calculateInsertionProbability(endNode);
            return new LexTreeEndUnitState(endNode, getWordHistory(), previous
                    .getSmearTerm(), smearProbability, languageProbability,
                    insertionProbability);
        }


        /**
         * Returns the string representation of this object
         *
         * @return the string representation
         */
        @Override
        public String toString() {
            return "lt-" + node + ' ' + getProbability() + '{' + wordSequence
                    + '}';
        }


        /**
         * Returns a pretty version of the string representation for this object
         *
         * @return a pretty string
         */
        public String toPrettyString() {
            return toString();
        }


        /**
         * Gets the successor arcs for this state from the cache
         *
         * @return the next set of arcs for this state, or null if none can be found or if caching is disabled.
         */
        SearchStateArc[] getCachedArcs() {
            if (cacheEnabled) {
                SearchStateArc[] arcs = arcCache.get(this);
                if (arcs != null) {
                    cacheHits++;
                }
                if (++cacheTrys % 1000000 == 0) {
                    System.out.println("Hits: " + cacheHits
                            + " of " + cacheTrys + ' ' +
                            ((float) cacheHits) / cacheTrys * 100f);
                }
                return arcs;
            } else {
                return null;
            }
        }


        /**
         * Puts the set of arcs into the cache
         *
         * @param arcs the arcs to cache.
         */
        void putCachedArcs(SearchStateArc[] arcs) {
            if (cacheEnabled) {
                arcCache.put(this, arcs);
            }
        }


        abstract public int getOrder();
    }

    /** Represents a unit in the search space */
    public class LexTreeEndUnitState extends LexTreeState
            implements UnitSearchState {

        float logLanguageProbability;
        float logInsertionProbability;


        /**
         * Constructs a LexTreeUnitState
         *

         * @param wordSequence the history of words




         */
        LexTreeEndUnitState(EndNode endNode, WordSequence wordSequence,
                            float smearTerm, float smearProb, float languageProbability,
                            float insertionProbability) {
            super(endNode, wordSequence, smearTerm, smearProb);
            logLanguageProbability = languageProbability;
            logInsertionProbability = insertionProbability;
            // System.out.println("LTEUS " + logLanguageProbability + " " +
            // logInsertionProbability);
        }


        /**
         * Returns the base unit associated with this state
         *
         * @return the base unit
         */
        public Unit getUnit() {
            return getEndNode().getBaseUnit();
        }


        /**
         * Generate a hashcode for an object
         *
         * @return the hashcode
         */
        @Override
        public int hashCode() {
            return super.hashCode() * 17 + 423;
        }


        /**
         * Gets the acoustic probability of entering this state
         *
         * @return the log probability
         */
        @Override
        public float getInsertionProbability() {
            return logInsertionProbability;
        }


        /**
         * Gets the language probability of entering this state
         *
         * @return the log probability
         */
        @Override
        public float getLanguageProbability() {
            return logLanguageProbability;
        }


        /**
         * Determines if the given object is equal to this object
         *
         * @param o the object to test
         * @return <code>true</code> if the object is equal to this
         */
        @Override
        public boolean equals(Object o) {
            return o == this || o instanceof LexTreeEndUnitState && super.equals(o);
        }


        /**
         * Returns the unit node for this state
         *
         * @return the unit node
         */
        private EndNode getEndNode() {
            return (EndNode) getNode();
        }


        /**
         * Returns the list of successors to this state
         *
         * @return a list of SearchState objects
         */
        @Override
        public SearchStateArc[] getSuccessors() {
            SearchStateArc[] arcs = getCachedArcs();
            if (arcs == null) {
                HMMNode[] nodes = getHMMNodes(getEndNode());
                arcs = new SearchStateArc[nodes.length];

                if (generateUnitStates) {
                    for (int i = 0; i < nodes.length; i++) {
                        arcs[i] = new LexTreeUnitState(nodes[i],
                                getWordHistory(), getSmearTerm(),
                                getSmearProb(), logOne, logOne,
                                this.getNode());
                    }
                } else {
                    for (int i = 0; i < nodes.length; i++) {
                        HMM hmm = nodes[i].getHMM();
                        arcs[i] = new LexTreeHMMState(nodes[i],
                                getWordHistory(), getSmearTerm(),
                                getSmearProb(), hmm.getInitialState(),
                                logOne, logOne, this.getNode());
                    }
                }
                putCachedArcs(arcs);
            }
            return arcs;
        }


        @Override
        public String toString() {
            return super.toString() + " EndUnit";
        }


        @Override
        public int getOrder() {
            return 3;
        }
    }

    /** Represents a unit in the search space */
    public class LexTreeUnitState extends LexTreeState
            implements UnitSearchState {

        private float logInsertionProbability;
        private float logLanguageProbability;
        private Node parentNode;
        private int hashCode = -1;


        /**
         * Constructs a LexTreeUnitState
         *
         * @param wordSequence the history of words
         */
        LexTreeUnitState(HMMNode hmmNode, WordSequence wordSequence,
                         float smearTerm, float smearProb, float languageProbability,
                         float insertionProbability) {
            this(hmmNode, wordSequence, smearTerm, smearProb,
                    languageProbability, insertionProbability, null);
        }


        /**
         * Constructs a LexTreeUnitState
         *
         * @param wordSequence the history of words
         */
        LexTreeUnitState(HMMNode hmmNode, WordSequence wordSequence,
                         float smearTerm, float smearProb, float languageProbability,
                         float insertionProbability, Node parentNode) {
            super(hmmNode, wordSequence, smearTerm, smearProb);
            this.logInsertionProbability = insertionProbability;
            this.logLanguageProbability = languageProbability;
            this.parentNode = parentNode;
        }


        /**
         * Returns the base unit associated with this state
         *
         * @return the base unit
         */
        public Unit getUnit() {
            return getHMMNode().getBaseUnit();
        }


        /**
         * Generate a hashcode for an object
         *
         * @return the hashcode
         */
        @Override
        public int hashCode() {
            if (hashCode == -1) {
                hashCode = super.hashCode() * 17 + 421;
                if (parentNode != null) {
                    hashCode *= 432;
                    hashCode += parentNode.hashCode();
                }
            }
            return hashCode;
        }


        /**
         * Determines if the given object is equal to this object
         *
         * @param o the object to test
         * @return <code>true</code> if the object is equal to this
         */
        @Override
        public boolean equals(Object o) {
            if (o == this) {
                return true;
            } else if (o instanceof LexTreeUnitState) {
                LexTreeUnitState other = (LexTreeUnitState) o;
                return parentNode == other.parentNode && super.equals(o);
            } else {
                return false;
            }
        }


        /**
         * Returns the unit node for this state
         *
         * @return the unit node
         */
        private HMMNode getHMMNode() {
            return (HMMNode) getNode();
        }


        /**
         * Returns the list of successors to this state
         *
         * @return a list of SearchState objects
         */
        @Override
        public SearchStateArc[] getSuccessors() {
            SearchStateArc[] arcs = new SearchStateArc[1];
            HMM hmm = getHMMNode().getHMM();
            arcs[0] = new LexTreeHMMState(getHMMNode(), getWordHistory(),
                    getSmearTerm(), getSmearProb(), hmm.getInitialState(),
                    logOne, logOne, parentNode);
            return arcs;
        }


        @Override
        public String toString() {
            return super.toString() + " unit";
        }


        /**
         * Gets the acoustic probability of entering this state
         *
         * @return the log probability
         */
        @Override
        public float getInsertionProbability() {
            return logInsertionProbability;
        }


        /**
         * Gets the language probability of entering this state
         *
         * @return the log probability
         */
        @Override
        public float getLanguageProbability() {
            return logLanguageProbability;
        }


        @Override
        public int getOrder() {
            return 4;
        }
    }

    /** Represents a HMM state in the search space */
    public class LexTreeHMMState extends LexTreeState implements HMMSearchState, ScoreProvider {

        private final HMMState hmmState;
        private float logLanguageProbability;
        private float logInsertionProbability;
        private final Node parentNode;
        int hashCode = -1;


        /**
         * Constructs a LexTreeHMMState
         *
         * @param hmmNode              the HMM state associated with this unit
         * @param wordSequence         the word history
         * @param languageProbability  the probability of the transition
         * @param insertionProbability the probability of the transition
         */
        LexTreeHMMState(HMMNode hmmNode, WordSequence wordSequence,
                        float smearTerm, float smearProb, HMMState hmmState,
                        float languageProbability, float insertionProbability,
                        Node parentNode) {
            super(hmmNode, wordSequence, smearTerm, smearProb);
            this.hmmState = hmmState;
            this.parentNode = parentNode;
            this.logLanguageProbability = languageProbability;
            this.logInsertionProbability = insertionProbability;
        }


        /**
         * Gets the ID for this state
         *
         * @return the ID
         */
        @Override
        public String getSignature() {
            return super.getSignature() + "-HMM-" + hmmState.getState();
        }


        /**
         * returns the HMM state associated with this state
         *
         * @return the HMM state
         */
        public HMMState getHMMState() {
            return hmmState;
        }


        /**
         * Generate a hashcode for an object
         *
         * @return the hashcode
         */
        @Override
        public int hashCode() {
            if (hashCode == -1) {
                hashCode = super.hashCode() * 29 + (hmmState.getState() + 1);
                if (parentNode != null) {
                    hashCode *= 377;
                    hashCode += parentNode.hashCode();
                }
            }
            return hashCode;
        }


        /**
         * Determines if the given object is equal to this object
         *
         * @param o the object to test
         * @return <code>true</code> if the object is equal to this
         */
        @Override
        public boolean equals(Object o) {
            if (o == this) {
                return true;
            } else if (o instanceof LexTreeHMMState) {
                LexTreeHMMState other = (LexTreeHMMState) o;
                return hmmState == other.hmmState
                        && parentNode == other.parentNode && super.equals(o);
            } else {
                return false;
            }
        }


        /**
         * Gets the language probability of entering this state
         *
         * @return the log probability
         */
        @Override
        public float getLanguageProbability() {
            return logLanguageProbability;
        }


        /**
         * Gets the language probability of entering this state
         *
         * @return the log probability
         */
        @Override
        public float getInsertionProbability() {
            return logInsertionProbability;
        }


        /**
         * Retrieves the set of successors for this state
         *
         * @return the list of successor states
         */
        @Override
        public SearchStateArc[] getSuccessors() {
            SearchStateArc[] nextStates = getCachedArcs();
            if (nextStates == null) {

                // if this is an exit state, we are transitioning to a
                // new unit or to a word end.

                if (hmmState.isExitState()) {
                    if (parentNode == null) {
                        nextStates = super.getSuccessors();
                    } else {
                        nextStates = super.getSuccessors(parentNode);
                    }
                } else {
                    // The current hmm state is not an exit state, so we
                    // just go through the next set of successors

                    HMMStateArc[] arcs = hmmState.getSuccessors();
                    nextStates = new SearchStateArc[arcs.length];
                    for (int i = 0; i < arcs.length; i++) {
                        HMMStateArc arc = arcs[i];
                        if (arc.getHMMState().isEmitting()) {
                            // if its a self loop and the prob. matches
                            // reuse the state
                            if (arc.getHMMState() == hmmState
                                    && logInsertionProbability == arc
                                    .getLogProbability()) {
                                nextStates[i] = this;
                            } else {
                                nextStates[i] = new LexTreeHMMState(
                                        (HMMNode) getNode(), getWordHistory(),
                                        getSmearTerm(), getSmearProb(),
                                        arc.getHMMState(), logOne,
                                        arc.getLogProbability(), parentNode);
                            }
                        } else {
                            nextStates[i] = new LexTreeNonEmittingHMMState(
                                    (HMMNode) getNode(), getWordHistory(),
                                    getSmearTerm(), getSmearProb(),
                                    arc.getHMMState(),
                                    arc.getLogProbability(), parentNode);
                        }
                    }
                }
                putCachedArcs(nextStates);
            }
            return nextStates;
        }


        /** Determines if this is an emitting state */
        @Override
        public boolean isEmitting() {
            return hmmState.isEmitting();
        }


        @Override
        public String toString() {
            return super.toString() + " hmm:" + hmmState;
        }


        @Override
        public int getOrder() {
            return 5;
        }


        public float getScore(Data data) {
            return hmmState.getScore(data);
        }

       public float[] getComponentScore(Data feature) {
            return hmmState.calculateComponentScore(feature);
		}

}

    /** Represents a non emitting hmm state */
    public class LexTreeNonEmittingHMMState extends LexTreeHMMState {

        /**
         * Constructs a NonEmittingLexTreeHMMState
         *


         * @param hmmState     the hmm state associated with this unit

         * @param wordSequence the word history
         * @param probability  the probability of the transition occurring

         */
        LexTreeNonEmittingHMMState(HMMNode hmmNode, WordSequence wordSequence,
                                   float smearTerm, float smearProb, HMMState hmmState,
                                   float probability, Node parentNode) {
            super(hmmNode, wordSequence, smearTerm, smearProb, hmmState,
                    logOne, probability, parentNode);
        }


        @Override
        public int getOrder() {
            return 0;
        }
    }

    /** Represents a word state in the search space */
    public class LexTreeWordState extends LexTreeState
            implements
            WordSearchState {

        private HMMNode lastNode;
        private float logLanguageProbability;


        /**
         * Constructs a LexTreeWordState
         *
         * @param wordNode       the word node
         * @param wordSequence   the sequence of words triphone context
         * @param languageProbability the probability of this word
         */
        LexTreeWordState(WordNode wordNode, HMMNode lastNode,
                         WordSequence wordSequence, float smearTerm, float smearProb,
                         float languageProbability) {

            super(wordNode, wordSequence, smearTerm, smearProb);
            // System.out.println("LTWS " + wordSequence);
            this.lastNode = lastNode;
            this.logLanguageProbability = languageProbability;
        }


        /**
         * Gets the word pronunciation for this state
         *
         * @return the pronunciation for this word
         */
        public Pronunciation getPronunciation() {
            return ((WordNode) getNode()).getPronunciation();
        }


        /**
         * Determines if this is a final state
         *
         * @return <code>true</code> if this is an final state.
         */
        @Override
        public boolean isFinal() {
            return ((WordNode) getNode()).isFinal();
        }


        /**
         * Generate a hashcode for an object
         *
         * @return the hashcode
         */
        @Override
        public int hashCode() {
            return super.hashCode() * 41 + lastNode.hashCode();
        }


        /**
         * Gets the unique signature for this state. The signature building code is slow and should only be used for
         * non-time-critical tasks such as plotting states.
         *
         * @return the signature
         */
        @Override
        public String getSignature() {
            return super.getSignature() + "-ln-" + lastNode.hashCode();
        }


        /**
         * Determines if the given object is equal to this object
         *
         * @param o the object to test
         * @return <code>true</code> if the object is equal to this
         */
        @Override
        public boolean equals(Object o) {
            if (o == this) {
                return true;
            } else if (o instanceof LexTreeWordState) {
                LexTreeWordState other = (LexTreeWordState) o;
                return lastNode == other.lastNode && super.equals(o);
            } else {
                return false;
            }
        }


        /**
         * Gets the language probability of entering this state
         *
         * @return the log probability
         */
        @Override
        public float getLanguageProbability() {
            return logLanguageProbability;
        }


        /**
         * Returns the list of successors to this state
         *
         * @return a list of SearchState objects
         */
        @Override
        public SearchStateArc[] getSuccessors() {
            SearchStateArc[] arcs = getCachedArcs();
            if (arcs == null) {
                arcs = EMPTY_ARC;
                WordNode wordNode = (WordNode) getNode();

                if (wordNode.getWord() != sentenceEndWord) {
                    int index = 0;
                    List<Node> list = new ArrayList<Node>();
                    Unit[] rc = lastNode.getRC();
                    Unit left = wordNode.getLastUnit();

                    for (Unit unit : rc) {
                        Node[] epList = hmmTree.getEntryPoint(left, unit);
                        for (Node n : epList) {
                            list.add(n);
                        }
                    }

                    // add a link to every possible entry point as well
                    // as link to the </s> node
                    arcs = new SearchStateArc[list.size() + 1];
                    for (Node node : list) {
                         arcs[index++] = createUnitStateArc((HMMNode)node, this);
                    }

                    // now add the link to the end of sentence arc:

                    arcs[index++] = createWordStateArc(hmmTree.getSentenceEndWordNode(), lastNode, this);
                }
                putCachedArcs(arcs);
            }
            return arcs;
        }


        @Override
        public int getOrder() {
            return 1;
        }


        /**
         * Returns true if this LexTreeWordState indicates the start of a word. Returns false if this LexTreeWordState
         * indicates the end of a word.
         *
         * @return true if this LexTreeWordState indicates the start of a word, false if this LexTreeWordState indicates
         *         the end of a word
         */
        public boolean isWordStart() {
            return false;
        }
    }

    /** Represents the final end of utterance word */
    public class LexTreeEndWordState extends LexTreeWordState
            implements
            WordSearchState {

        /**
         * Constructs a LexTreeWordState
         *
         * @param wordNode       the word node
         * @param lastNode       the previous word node
         * @param wordSequence   the sequence of words triphone context


         * @param logProbability the probability of this word occurring
         */
        LexTreeEndWordState(WordNode wordNode, HMMNode lastNode,
                            WordSequence wordSequence, float smearTerm, float smearProb,
                            float logProbability) {
            super(wordNode, lastNode, wordSequence, smearTerm, smearProb,
                    logProbability);
        }


        @Override
        public int getOrder() {
            return 2;
        }

    }


    /**
     * Determines the insertion probability for the given unit lex node
     *
     * @param unitNode the unit lex node
     * @return the insertion probability
     */
    private float calculateInsertionProbability(UnitNode unitNode) {
        int type = unitNode.getType();

        if (type == UnitNode.SIMPLE_UNIT) {
            return logUnitInsertionProbability;
        } else if (type == UnitNode.WORD_BEGINNING_UNIT) {
            return logUnitInsertionProbability + logWordInsertionProbability;
        } else if (type == UnitNode.SILENCE_UNIT) {
            return logSilenceInsertionProbability;
        } else { // must be filler
            return logFillerInsertionProbability;
        }
    }


    /**
     * Retrieves the unigram smear from the given node
     *
     * @return the unigram smear
     */
    private float getUnigramSmear(Node node) {
        float prob;
        if (wantUnigramSmear) {
            prob = node.getUnigramProbability() * unigramSmearWeight;
        } else {
            prob = logOne;
        }
        return prob;
    }


    /**
     * Returns the smear term for the given word sequence
     *
     * @param ws the word sequence
     * @return the smear term for the word sequence
     */
    private float getSmearTermFromLanguageModel(WordSequence ws) {
        return languageModel.getSmear(ws);
    }


    /**
     * Gets the set of HMM nodes associated with the given end node
     *
     * @param endNode the end node
     * @return an array of associated HMM nodes
     */
    private HMMNode[] getHMMNodes(EndNode endNode) {
        return hmmTree.getHMMNodes(endNode);
    }

}