HMMTree.java example

Explorer

Sphinx-master
- sphinx4-master
  - sphinx4-core
    - src
      - main
        java
        edu
        cmu
        sphinx
        alignment
        LongTextAligner.java
        SimpleTokenizer.java
        TextTokenizer.java
        Token.java
        USEnglishTokenizer.java
        tokenizer
        CharTokenizer.java
        DecisionTree.java
        FeatureSet.java
        Item.java
        ItemContents.java
        NumberExpander.java
        PathExtractor.java
        PrefixFSM.java
        PronounceableFSM.java
        Relation.java
        SuffixFSM.java
        Utterance.java
        WordRelation.java
        api
        AbstractSpeechRecognizer.java
        Configuration.java
        Context.java
        LiveSpeechRecognizer.java
        Microphone.java
        SpeechAligner.java
        SpeechResult.java
        SpeechSourceProvider.java
        StreamSpeechRecognizer.java
        decoder
        AbstractDecoder.java
        Decoder.java
        FrameDecoder.java
        ResultListener.java
        ResultProducer.java
        adaptation
        ClusteredDensityFileData.java
        Stats.java
        Transform.java
        pruner
        NullPruner.java
        Pruner.java
        SimplePruner.java
        scorer
        AcousticScorer.java
        BackgroundModelNormalizer.java
        MaxScoreNormalizer.java
        ScoreNormalizer.java
        ScoreProvider.java
        Scoreable.java
        SimpleAcousticScorer.java
        ThreadedAcousticScorer.java
        search
        ActiveList.java
        ActiveListFactory.java
        ActiveListManager.java
        AlternateHypothesisManager.java
        PartitionActiveListFactory.java
        Partitioner.java
        SearchManager.java
        SimpleActiveListFactory.java
        SimpleActiveListManager.java
        SimpleBreadthFirstSearchManager.java
        SkewPruningSearchManager.java
        SortingActiveListFactory.java
        Token.java
        TokenHeapSearchManager.java
        TokenSearchManager.java
        WordActiveListFactory.java
        WordPruningBreadthFirstLookaheadSearchManager.java
        WordPruningBreadthFirstSearchManager.java
        stats
        StateHistoryTracker.java
        TokenTracker.java
        TokenTypeTracker.java
        WordTracker.java
        frontend
        AutoCepstrum.java
        BaseDataProcessor.java
        Data.java
        DataBlocker.java
        DataEndSignal.java
        DataProcessingException.java
        DataProcessor.java
        DataStartSignal.java
        DoubleData.java
        FloatData.java
        FrontEnd.java
        GainControlProcessor.java
        Signal.java
        SignalListener.java
        databranch
        DataBufferProcessor.java
        DataListener.java
        DataProducer.java
        FrontEndSplitter.java
        denoise
        Denoise.java
        endpoint
        AbstractVoiceActivityDetector.java
        SpeechClassifiedData.java
        SpeechClassifier.java
        SpeechEndSignal.java
        SpeechMarker.java
        SpeechStartSignal.java
        feature
        AbstractFeatureExtractor.java
        BatchAGC.java
        BatchCMN.java
        BatchVarNorm.java
        ConcatFeatureExtractor.java
        DeltasFeatureExtractor.java
        FeatureTransform.java
        FrameDropper.java
        KaldiDeltasFeatureExtractor.java
        LiveCMN.java
        S3FeatureExtractor.java
        filter
        Dither.java
        EnergyFilter.java
        Preemphasizer.java
        frequencywarp
        FrequencyWarper.java
        LinearPredictor.java
        MelFilter.java
        MelFilter2.java
        MelFrequencyFilterBank.java
        MelFrequencyFilterBank2.java
        PLPCepstrumProducer.java
        PLPFilter.java
        PLPFrequencyFilterBank.java
        transform
        DiscreteCosineTransform.java
        DiscreteCosineTransform2.java
        DiscreteFourierTransform.java
        KaldiDiscreteCosineTransform.java
        Lifter.java
        util
        AudioContinuityTester.java
        AudioFileDataSource.java
        AudioFileProcessListener.java
        ConcatAudioFileDataSource.java
        ConcatFileDataSource.java
        DataConverter.java
        DataDumper.java
        DataUtil.java
        EnergyPlotter.java
        FrontEndUtils.java
        Microphone.java
        StreamCepstrumSource.java
        StreamDataSource.java
        StreamHTKCepstrum.java
        Utterance.java
        VUMeter.java
        VUMeterMonitor.java
        VUMeterPanel.java
        WavWriter.java
        window
        RaisedCosineWindower.java
        fst
        Arc.java
        Convert.java
        Export.java
        Fst.java
        ImmutableFst.java
        ImmutableState.java
        Import.java
        State.java
        operations
        ArcSort.java
        Compose.java
        Connect.java
        Determinize.java
        ExtendFinal.java
        ILabelCompare.java
        NShortestPaths.java
        OLabelCompare.java
        Project.java
        ProjectType.java
        Reverse.java
        RmEpsilon.java
        semiring
        LogSemiring.java
        ProbabilitySemiring.java
        Semiring.java
        TropicalSemiring.java
        sequitur
        SequiturImport.java
        utils
        Pair.java
        Utils.java
        instrumentation
        AccuracyTracker.java
        BestPathAccuracyTracker.java
        ConfigMonitor.java
        MemoryTracker.java
        Monitor.java
        RecognizerMonitor.java
        RejectionTracker.java
        Resetable.java
        SpeedTracker.java
        jsgf
        GrXMLGrammar.java
        GrXMLHandler.java
        JSGFGrammar.java
        JSGFGrammarException.java
        JSGFGrammarParseException.java
        JSGFRuleGrammar.java
        JSGFRuleGrammarFactory.java
        JSGFRuleGrammarManager.java
        parser
        JSGFParser.java
        JSGFParserConstants.java
        JSGFParserTokenManager.java
        JavaCharStream.java
        ParseException.java
        Token.java
        TokenMgrError.java
        rule
        JSGFRule.java
        JSGFRuleAlternatives.java
        JSGFRuleCount.java
        JSGFRuleName.java
        JSGFRuleSequence.java
        JSGFRuleTag.java
        JSGFRuleToken.java
        linguist
        HMMSearchState.java
        Linguist.java
        LinguistProcessor.java
        SearchGraph.java
        SearchState.java
        SearchStateArc.java
        UnitSearchState.java
        WordSearchState.java
        WordSequence.java
        acoustic
        AcousticModel.java
        Context.java
        HMM.java
        HMMPool.java
        HMMPosition.java
        HMMState.java
        HMMStateArc.java
        LeftRightContext.java
        Unit.java
        UnitManager.java
        tiedstate
        CompositeSenone.java
        GaussianMixture.java
        GaussianWeights.java
        HMMManager.java
        HTK
        GMMDiag.java
        HMMSet.java
        HMMState.java
        Lab.java
        NamesConversion.java
        SingleHMM.java
        HTKLoader.java
        KaldiLoader.java
        LazyHmmManager.java
        Loader.java
        MixtureComponent.java
        Pool.java
        Saver.java
        ScoreCachingSenone.java
        Senone.java
        SenoneHMM.java
        SenoneHMMState.java
        SenoneSequence.java
        Sphinx3Loader.java
        TiedStateAcousticModel.java
        kaldi
        ConstantEventMap.java
        DiagGmm.java
        EventMap.java
        EventMapWithKey.java
        KaldiGmmPool.java
        KaldiTextParser.java
        SplitEventMap.java
        TableEventMap.java
        TransitionModel.java
        tiedmixture
        MixtureComponentSet.java
        MixtureComponentSetScores.java
        PrunableMixtureComponent.java
        SetBasedGaussianMixture.java
        trainer
        Buffer.java
        HMMPoolManager.java
        ModelInitializerLoader.java
        Sphinx3Saver.java
        TrainerAcousticModel.java
        TrainerScore.java
        trivial
        TrivialAcousticModel.java
        aflat
        AFlatLinguist.java
        PhoneLoop.java
        allphone
        AllphoneLinguist.java
        AllphoneSearchGraph.java
        PhoneHmmSearchState.java
        PhoneNonEmittingSearchState.java
        PhoneWordSearchState.java
        dflat
        DynamicFlatLinguist.java
        OutOfGrammarGraph.java
        dictionary
        Dictionary.java
        MappingDictionary.java
        Pronunciation.java
        TextDictionary.java
        Word.java
        flat
        AlternativeState.java
        BranchState.java
        CIPhoneLoop.java
        ExtendedUnitState.java
        FlatLinguist.java
        GrammarPoint.java
        GrammarState.java
        HMMStateState.java
        NonEmittingHMMState.java
        PronunciationState.java
        SentenceHMMState.java
        SentenceHMMStateArc.java
        SentenceHMMStateVisitor.java
        UnitState.java
        WordState.java
        g2p
        G2PConverter.java
        Path.java
        PathComparator.java
        language
        classes
        ClassBasedLanguageModel.java
        ClassMap.java
        ClassPresentingDictionary.java
        ClassProbability.java
        grammar
        AlignerGrammar.java
        BatchForcedAlignerGrammar.java
        FSTGrammar.java
        ForcedAlignerGrammar.java
        Grammar.java
        GrammarArc.java
        GrammarInterface.java
        GrammarNode.java
        LMGrammar.java
        LatticeGrammar.java
        SimpleWordListGrammar.java
        ngram
        DynamicTrigramModel.java
        InterpolatedLanguageModel.java
        KeywordOptimizerModel.java
        LanguageModel.java
        NetworkLanguageModel.java
        SimpleNGramModel.java
        large
        BinaryLoader.java
        BinaryStreamLoader.java
        KeywordOptimizerLargeNGramModel.java
        LargeNGramModel.java
        LargeTrigramModel.java
        NGramBuffer.java
        NGramProbability.java
        NMaxGramBuffer.java
        UnigramProbability.java
        trie
        BinaryLoader.java
        NgramTrie.java
        NgramTrieBitarr.java
        NgramTrieModel.java
        NgramTrieQuant.java
        lextree
        HMMTree.java
        LexTreeLinguist.java
        util
        GDLDumper.java
        LRUCache.java
        LinguistDumper.java
        LinguistStats.java
        LinguistTimer.java
        recognizer
        Recognizer.java
        StateListener.java
        result
        BoundedPriorityQueue.java
        Edge.java
        FrameStatistics.java
        GDLLatticeFactory.java
        Lattice.java
        LatticeOptimizer.java
        LatticeRescorer.java
        Nbest.java
        Node.java
        Path.java
        Result.java
        TokenGraphDumper.java
        WordResult.java
        speakerid
        Segment.java
        SpeakerCluster.java
        SpeakerIdentification.java
        Tester.java
        tools
        aligner
        Aligner.java
        audio
        AudioData.java
        AudioDataInputStream.java
        AudioPanel.java
        AudioPlayer.java
        AudioTool.java
        CepstrumPanel.java
        Downsampler.java
        FilenameDialog.java
        RawReader.java
        RawRecorder.java
        RawWriter.java
        Sinusoid.java
        SpectrogramPanel.java
        Utils.java
        bandwidth
        BandDetector.java
        batch
        BatchDecoderRecognizer.java
        BatchForcedAlignerRecognizer.java
        BatchModeRecognizer.java
        BatchNISTRecognizer.java
        SphinxShell.java
        endpoint
        Segmenter.java
        feature
        FeatureFileDumper.java
        live
        LiveModeRecognizer.java
        transcriber
        Transcriber.java
        trainer
        BaumWelchLearner.java
        BuildTranscriptHMM.java
        ControlFile.java
        Edge.java
        FlatInitializerLearner.java
        Graph.java
        Learner.java
        Node.java
        NodeType.java
        SimpleControlFile.java
        SimpleTrainManager.java
        SimpleTranscript.java
        SimpleUtterance.java
        Stage.java
        TrainManager.java
        Trainer.java
        TrainerDictionary.java
        TrainerLink.java
        TrainerToken.java
        Transcript.java
        TranscriptGraph.java
        TranscriptHMMGraph.java
        Utterance.java
        UtteranceGraph.java
        UtteranceHMMGraph.java
        util
        BatchFile.java
        BatchItem.java
        BatchManager.java
        Cache.java
        CommandInterface.java
        CommandInterpreter.java
        Complex.java
        CustomThreadFactory.java
        ExtendedStreamTokenizer.java
        FileReferenceSource.java
        GapInsertionDetector.java
        LogMath.java
        MatrixUtils.java
        NISTAlign.java
        PooledBatchManager.java
        Range.java
        ReferenceSource.java
        SimpleBatchManager.java
        SocketCommandClient.java
        SocketCommandInterpreter.java
        SphinxHelp.java
        SphinxLogFormatter.java
        StatisticsVariable.java
        StreamFactory.java
        TimeFrame.java
        Timer.java
        TimerPool.java
        Utilities.java
        machlearn
        OVector.java
        props
        ConfCategory.java
        ConfigHandler.java
        Configurable.java
        ConfigurableAdapter.java
        ConfigurationChangeListener.java
        ConfigurationManager.java
        ConfigurationManagerUtils.java
        InternalConfigurationException.java
        PropertyException.java
        PropertySheet.java
        PropertyType.java
        RawPropertyData.java
        S4Boolean.java
        S4Component.java
        S4ComponentList.java
        S4Double.java
        S4Integer.java
        S4PropWrapper.java
        S4Property.java
        S4String.java
        S4StringList.java
        SaxLoader.java
        tools
        GDLDumper.java
        HTMLDumper.java
      - test
        java
        edu
        cmu
        sphinx
        alignment
        SpeechAlignerTest.java
        TextAlignerLargeTest.java
        TextAlignerSmallTest.java
        UsEnglishWordExpanderTest.java
        api
        LiveRecognizerTest.java
        decoder
        scorer
        ScorerTests.java
        search
        test
        PartitionerTest.java
        frontend
        AudioDataSourcesTest.java
        DataBlockerTest.java
        FrontendElementTest.java
        RandomDataProcessor.java
        RandomSampleRepeater.java
        SpeechMarkerTest.java
        fst
        ArcSortTest.java
        ComposeEpsilonTest.java
        ComposeTest.java
        ConnectTest.java
        DeterminizeTest.java
        ImportTest.java
        NShortestPathsTest.java
        ProjectTest.java
        ReverseTest.java
        RmEpsilonTest.java
        linguist
        WordSequenceTest.java
        acoustic
        tiedstate
        test
        MixtureComponentTest.java
        dictionary
        DictionaryTest.java
        language
        grammar
        BatchForcedAlignerGrammarTest.java
        FSTGrammarTest.java
        ngram
        DynamicTrigramModelTest.java
        large
        LargeNgramTest.java
        trie
        NgramTrieTest.java
        result
        LatticeCompTest.java
        LatticeIOTest.java
        PosteriorTest.java
        tools
        bandwidth
        BandDetectorTest.java
        util
        props
        AnotherDummyFrontEnd.java
        AnotherDummyProcessor.java
        CMUTests.java
        ComponentListTests.java
        ComponentPropertyTest.java
        ConfigurationManagerTest.java
        DummyComp.java
        DummyFrontEnd.java
        DummyFrontEndProcessor.java
        DummyProcessor.java
        TestConfigurable.java
  - sphinx4-samples
    - src
      - main
        java
        edu
        cmu
        sphinx
        demo
        DemoRunner.java
        aligner
        AlignerDemo.java
        allphone
        AllphoneDemo.java
        dialog
        DialogDemo.java
        speakerid
        SpeakerIdentificationDemo.java
        transcriber
        TranscriberDemo.java

/*
 * Copyright 1999-2002 Carnegie Mellon University.  
 * Portions Copyright 2002 Sun Microsystems, Inc.  
 * Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
 * All Rights Reserved.  Use is subject to license terms.
 * 
 * See the file "license.terms" for information on usage and
 * redistribution of this file, and for a DISCLAIMER OF ALL 
 * WARRANTIES.
 *
 */

package edu.cmu.sphinx.linguist.lextree;

import edu.cmu.sphinx.linguist.WordSequence;
import edu.cmu.sphinx.linguist.acoustic.HMM;
import edu.cmu.sphinx.linguist.acoustic.HMMPool;
import edu.cmu.sphinx.linguist.acoustic.HMMPosition;
import edu.cmu.sphinx.linguist.acoustic.Unit;
import edu.cmu.sphinx.linguist.dictionary.Dictionary;
import edu.cmu.sphinx.linguist.dictionary.Pronunciation;
import edu.cmu.sphinx.linguist.dictionary.Word;
import edu.cmu.sphinx.linguist.language.ngram.LanguageModel;
import edu.cmu.sphinx.util.LogMath;
import edu.cmu.sphinx.util.Utilities;

import java.util.*;
import java.util.logging.Logger;


/** Represents a node in the HMM Tree */

// For large vocabularies we may create millions of these objects,
// therefore they are extremely space sensitive. So we want to make
// these objects as small as possible.  The requirements for these
// objects when building the tree of nodes are very different from once
// we have built it. When building, we need to easily add successor
// nodes and quickly identify duplicate children nodes. After the tree
// is built we just need to quickly identify successors.  We want the
// flexibility of a map to manage successors at startup, but we don't
// want the space penalty (at least 5 32 bit fields per map), instead
// we'd like an array.  To support this dual mode, we manage the
// successors in an Object which can either be a Map or a List
// depending upon whether the node has been frozen or not.

class Node {

    private static int nodeCount;
    private static int successorCount;
    
    /** 
     * This can be either Map during tree construction or Array after
     * tree freeze. Conversion to array helps to save memory.
     */
    private Object successors;
    private float logUnigramProbability;


    /**
     * Creates a node
     *
     * @param probability the unigram probability for the node
     */
    Node(float probability) {
        logUnigramProbability = probability;
        nodeCount++;
//        if ((nodeCount % 10000) == 0) {
//             System.out.println("NC " + nodeCount);
//        }
    }


    /**
     * Returns the unigram probability
     *
     * @return the unigram probability
     */
    public float getUnigramProbability() {
        return logUnigramProbability;
    }


    /**
     * Sets the unigram probability
     *
     * @param probability the unigram probability
     */
    public void setUnigramProbability(float probability) {
        logUnigramProbability = probability;
    }


    /**
     * Given an object get the set of successors for this object
     *
     * @param key the object key
     * @return the node containing the successors
     */
    private Node getSuccessor(Object key) {
        Map<Object, Node> successors = getSuccessorMap();
        return successors.get(key);
    }


    /**
     * Add the child to the set of successors
     *
     * @param key   the object key
     * @param child the child to add
     */
    void putSuccessor(Object key, Node child) {
        Map<Object, Node> successors = getSuccessorMap();
        successors.put(key, child);
    }


    /**
     * Gets the successor map for this node
     *
     * @return the successor map
     */
    @SuppressWarnings({"unchecked"})
    public Map<Object, Node> getSuccessorMap() {
        if (successors == null) {
            successors = new HashMap<Object, Node>(4);
        }

        assert successors instanceof Map;
        return (Map<Object, Node>) successors;
    }


    /** Freeze the node. Convert the successor map into an array list */
    void freeze() {
        if (successors instanceof Map<?,?>) {
            Map<Object, Node> map = getSuccessorMap();
            successors = map.values().toArray(new Node[map.size()]);
            for (Node node : map.values()) {
                node.freeze();
            }
            successorCount += map.size();
        }
    }


    static void dumpNodeInfo() {
        System.out.println("Nodes: " + nodeCount + " successors " +
                successorCount + " avg " + (successorCount / nodeCount));
    }


    /**
     * Adds a child node holding an hmm to the successor.  If a node similar to the child has already been added, we use
     * the previously added node, otherwise we add this. Also, we record the base unit of the child in the set of right
     * context
     *
     * @param hmm the hmm to add
     * @return the node that holds the hmm (new or old)
     */
    Node addSuccessor(HMM hmm, float probability) {
        Node child = null;
        Node matchingChild = getSuccessor(hmm);
        if (matchingChild == null) {
            child = new HMMNode(hmm, probability);
            putSuccessor(hmm, child);
        } else {
            if (matchingChild.getUnigramProbability() < probability) {
                matchingChild.setUnigramProbability(probability);
            }
            child = matchingChild;
        }
        return child;
    }


    /**
     * Adds a child node holding a pronunciation to the successor. If a node similar to the child has already been
     * added, we use the previously added node, otherwise we add this. Also, we record the base unit of the child in the
     * set of right context
     *
     * @param pronunciation the pronunciation to add
     * @param wordNodeMap 
     * @return the node that holds the pronunciation (new or old)
     */
    WordNode addSuccessor(Pronunciation pronunciation, float probability, Map<Pronunciation, WordNode> wordNodeMap) {
        WordNode child = null;
        WordNode matchingChild = (WordNode) getSuccessor(pronunciation);
        if (matchingChild == null) {
            child = wordNodeMap.get(pronunciation);
            if (child == null) {
                child = new WordNode(pronunciation, probability);
                wordNodeMap.put(pronunciation, child);
            }
            putSuccessor(pronunciation, child);
        } else {
            if (matchingChild.getUnigramProbability() < probability) {
                matchingChild.setUnigramProbability(probability);
            }
            child = matchingChild;
        }
        return child;
    }


    void addSuccessor(WordNode wordNode) {
        putSuccessor(wordNode, wordNode);
    }


    /**
     * Adds an EndNode to the set of successors for this node If a node similar to the child has already been added, we
     * use the previously added node, otherwise we add this.
     *
     * @param child       the endNode to add
     * @param probability probability for this transition
     * @return the node that holds the endNode (new or old)
     */
    EndNode addSuccessor(EndNode child, float probability) {
        Unit baseUnit = child.getBaseUnit();
        EndNode matchingChild = (EndNode) getSuccessor(baseUnit);
        if (matchingChild == null) {
            putSuccessor(baseUnit, child);
        } else {
            if (matchingChild.getUnigramProbability() < probability) {
                matchingChild.setUnigramProbability(probability);
            }
            child = matchingChild;
        }
        return child;
    }


    /**
     * Adds a child node to the successor.  If a node similar to the child has already been added, we use the previously
     * added node, otherwise we add this. Also, we record the base unit of the child in the set of right context
     *
     * @param child the child to add
     * @return the node (may be different than child if there was already a node attached holding the hmm held by
     *         child)
     */
    UnitNode addSuccessor(UnitNode child) {
        UnitNode matchingChild = (UnitNode) getSuccessor(child.getKey());
        if (matchingChild == null) {
            putSuccessor(child.getKey(), child);
        } else {
            child = matchingChild;
        }

        return child;
    }


    /**
     * Returns the successors for this node
     *
     * @return the set of successor nodes
     */
    Node[] getSuccessors() {
        if (successors instanceof Map<?, ?>) {
            freeze();
        }
        return (Node[])successors;
    }


    /**
     * Returns the string representation for this object
     *
     * @return the string representation of the object
     */
    @Override
    public String toString() {
        return "Node ";
    }
}


/** A node representing a word in the HMM tree */
class WordNode extends Node {

    private final Pronunciation pronunciation;
    private final boolean isFinal;

    /**
     * Creates a word node
     *
     * @param pronunciation the pronunciation to wrap in this node
     * @param probability   the word unigram probability
     */
    WordNode(Pronunciation pronunciation, float probability) {
        super(probability);
        this.pronunciation = pronunciation;
        this.isFinal = pronunciation.getWord().isSentenceEndWord();
    }


    /**
     * Gets the word associated with this node
     *
     * @return the word
     */
    Word getWord() {
        return pronunciation.getWord();
    }


    /**
     * Gets the pronunciation associated with this node
     *
     * @return the pronunciation
     */
    Pronunciation getPronunciation() {
        return pronunciation;
    }


    /**
     * Gets the last unit for this word
     *
     * @return the last unit
     */
    Unit getLastUnit() {
        Unit[] units = pronunciation.getUnits();
        return units[units.length - 1];
    }


    /**
     * Returns the successors for this node
     *
     * @return the set of successor nodes
     */
    @Override
    Node[] getSuccessors() {
        throw new Error("Not supported");
    }


    /**
     * Returns a string representation for this object
     *
     * @return a string representation
     */
    @Override
    public String toString() {
        return "WordNode " + pronunciation + " p " +
                getUnigramProbability();
    }


    public boolean isFinal() {
        return isFinal;
    }
}


/**
 * A class that represents the initial word in the search space. It is treated specially because we need to keep track
 * of the context as well. The context is embodied in the parent node
 */
class InitialWordNode extends WordNode {

    final HMMNode parent;


    /**
     * Creates an InitialWordNode
     *
     * @param pronunciation the pronunciation
     * @param parent        the parent node
     */
    InitialWordNode(Pronunciation pronunciation, HMMNode parent) {
        super(pronunciation, LogMath.LOG_ONE);
        this.parent = parent;
    }


    /**
     * Gets the parent for this word node
     *
     * @return the parent
     */
    HMMNode getParent() {
        return parent;
    }

}


abstract class UnitNode extends Node {

    public final static int SIMPLE_UNIT = 1;
    public final static int WORD_BEGINNING_UNIT = 2;
    public final static int SILENCE_UNIT = 3;
    public final static int FILLER_UNIT = 4;

    private int type;


    /**
     * Creates the UnitNode
     *
     * @param probablilty the probability for the node
     */
    UnitNode(float probablilty) {
        super(probablilty);
    }


    /**
     * Returns the base unit for this hmm node
     *
     * @return the base unit
     */
    abstract Unit getBaseUnit();


    abstract Object getKey();


    abstract HMMPosition getPosition();


    /**
     * Gets the unit type (one of SIMPLE_UNIT, WORD_BEGINNING_UNIT, SIMPLE_UNIT or FILLER_UNIT
     *
     * @return the unit type
     */
    int getType() {
        return type;
    }


    /**
     * Sets the unit type
     *
     * @param type the unit type
     */
    void setType(int type) {
        this.type = type;
    }

}

/** A node that represents an HMM in the hmm tree */

class HMMNode extends UnitNode {

    private final HMM hmm;

    // There can potentially be a large number of nodes (millions),
    // therefore it is important to conserve space as much as
    // possible.  While building the HMMNodes, we keep right contexts
    // in a set to allow easy pruning of duplicates.  Once the tree is
    // entirely built, we no longer need to manage the right contexts
    // as a set, a simple array will do. The freeze method converts
    // the set to the array of units.  This rcSet object holds the set
    // during construction and the array after the freeze.

    private Object rcSet;


    /**
     * Creates the node, wrapping the given hmm
     *
     * @param hmm the hmm to hold
     */
    HMMNode(HMM hmm, float probablilty) {
        super(probablilty);
        this.hmm = hmm;

        Unit base = getBaseUnit();

        int type = SIMPLE_UNIT;
        if (base.isSilence()) {
            type = SILENCE_UNIT;
        } else if (base.isFiller()) {
            type = FILLER_UNIT;
        } else if (hmm.getPosition().isWordBeginning()) {
            type = WORD_BEGINNING_UNIT;
        }
        setType(type);
    }


    /**
     * Returns the base unit for this hmm node
     *
     * @return the base unit
     */
    @Override
    Unit getBaseUnit() {
        // return hmm.getUnit().getBaseUnit();
        return hmm.getBaseUnit();
    }


    /**
     * Returns the hmm for this node
     *
     * @return the hmm
     */
    HMM getHMM() {
        return hmm;
    }


    @Override
    HMMPosition getPosition() {
        return hmm.getPosition();
    }


    @Override
    HMM getKey() {
        return getHMM();
    }


    /**
     * Returns a string representation for this object
     *
     * @return a string representation
     */
    @Override
    public String toString() {
        return "HMMNode " + hmm + " p " + getUnigramProbability();
    }


    /**
     * Adds a right context to the set of possible right contexts for this node. This is typically only needed for hmms
     * at the ends of words.
     *
     * @param rc the right context.
     */
    void addRC(Unit rc) {
        getRCSet().add(rc);
    }


    /** Freeze this node. Convert the set into an array to reduce memory overhead */
    @Override
    @SuppressWarnings({"unchecked"})
    void freeze() {
        super.freeze();
        if (rcSet instanceof Set) {
            Set<Unit> set = (Set<Unit>) rcSet;
            rcSet = set.toArray(new Unit[set.size()]);
        }
    }


    /**
     * Gets the rc as a set. If we've already been frozen it is an error
     *
     * @return the set of right contexts
     */
    @SuppressWarnings({"unchecked"})
    private Set<Unit> getRCSet() {
        if (rcSet == null) {
            rcSet = new HashSet<Unit>();
        }

        assert rcSet instanceof HashSet;
        return (Set<Unit>) rcSet;
    }


    /**
     * returns the set of right contexts for this node
     *
     * @return the set of right contexts
     */
    Unit[] getRC() {
        if (rcSet instanceof HashSet<?>) {
            freeze();
        }
        return (Unit[]) rcSet;
    }
}


class EndNode extends UnitNode {

    final Unit baseUnit;
    final Unit leftContext;
    final Integer key;


    /**
     * Creates the node, wrapping the given hmm
     *
     * @param baseUnit    the base unit for this node
     * @param lc          the left context
     * @param probablilty the probability for the transition to this node
     */
    EndNode(Unit baseUnit, Unit lc, float probablilty) {
        super(probablilty);
        this.baseUnit = baseUnit;
        this.leftContext = lc;
        key = baseUnit.getBaseID() * 121 + leftContext.getBaseID();
    }


    /**
     * Returns the base unit for this hmm node
     *
     * @return the base unit
     */
    @Override
    Unit getBaseUnit() {
        return baseUnit;
    }


    /**
     * Returns the base unit for this hmm node
     *
     * @return the base unit
     */
    Unit getLeftContext() {
        return leftContext;
    }


    @Override
    Integer getKey() {
        return key;
    }


    @Override
    HMMPosition getPosition() {
        return HMMPosition.END;
    }


    /**
     * Returns a string representation for this object
     *
     * @return a string representation
     */
    @Override
    public String toString() {
        return "EndNode base:" + baseUnit + " lc " + leftContext + ' ' + key;
    }


    /** Freeze this node. Convert the set into an array to reduce memory overhead */
    @Override
    void freeze() {
        super.freeze();
    }
}



/**
 * Represents the vocabulary as a lex tree with nodes in the tree representing either words (WordNode) or units
 * (HMMNode). HMMNodes may be shared.
 */
class HMMTree {

    private final HMMPool hmmPool;
    private InitialWordNode initialNode;
    private Dictionary dictionary;

    private LanguageModel lm;
    private final boolean addFillerWords;
    private final boolean addSilenceWord = true;
    private final Set<Unit> entryPoints = new HashSet<Unit>();
    private Set<Unit> exitPoints = new HashSet<Unit>();
    private Set<Word> allWords;
    private EntryPointTable entryPointTable;
    private boolean debug;
    private final float languageWeight;
    
    private final Map<Object, HMMNode[]> endNodeMap;
    private final Map<Pronunciation, WordNode> wordNodeMap;
    
    private WordNode sentenceEndWordNode;
    private Logger logger;


    /**
     * Creates the HMMTree
     *
     * @param pool           the pool of HMMs and units
     * @param dictionary     the dictionary containing the pronunciations
     * @param lm             the source of the set of words to add to the lex tree
     * @param addFillerWords if <code>false</code> add filler words
     * @param languageWeight the languageWeight
     */
    HMMTree(HMMPool pool, Dictionary dictionary, LanguageModel lm,
            boolean addFillerWords, float languageWeight) {
        this.hmmPool = pool;
        this.dictionary = dictionary;
        this.lm = lm;
        this.endNodeMap = new HashMap<Object, HMMNode[]>();
        this.wordNodeMap = new HashMap<Pronunciation, WordNode>();
        this.addFillerWords = addFillerWords;
        this.languageWeight = languageWeight;
        
        logger = Logger.getLogger(HMMTree.class.getSimpleName());
        compile();
    }


    /**
     * Given a base unit and a left context, return the set of entry points into the lex tree
     *
     * @param lc   the left context
     * @param base the center unit
     * @return the set of entry points
     */
    public Node[] getEntryPoint(Unit lc, Unit base) {
        EntryPoint ep = entryPointTable.getEntryPoint(base);
        return ep.getEntryPointsFromLeftContext(lc).getSuccessors();
    }


    /**
     * Gets the  set of hmm nodes associated with the given end node
     *
     * @param endNode the end node
     * @return an array of associated hmm nodes
     */
    public HMMNode[] getHMMNodes(EndNode endNode) {
        HMMNode[] results = endNodeMap.get(endNode.getKey());
        if (results == null) {
            // System.out.println("Filling cache for " + endNode.getKey()
            //        + " size " + endNodeMap.size());
            Map<HMM, HMMNode> resultMap = new HashMap<HMM, HMMNode>();
            Unit baseUnit = endNode.getBaseUnit();
            Unit lc = endNode.getLeftContext();
            for (Unit rc : entryPoints) {
                HMM hmm = hmmPool.getHMM(baseUnit, lc, rc, HMMPosition.END);
                HMMNode hmmNode = resultMap.get(hmm);
                if (hmmNode == null) {
                    hmmNode = new HMMNode(hmm, LogMath.LOG_ONE);
                    resultMap.put(hmm, hmmNode);
                }
                hmmNode.addRC(rc);
                for (Node node : endNode.getSuccessors()) {
                    WordNode wordNode = (WordNode)node;
                    hmmNode.addSuccessor(wordNode);
                }
            }

            // cache it
            results = resultMap.values().toArray(new HMMNode[resultMap.size()]);
            endNodeMap.put(endNode.getKey(), results);
        }

        // System.out.println("GHN: " + endNode + " " + results.length);
        return results;
    }


    /**
     * Returns the word node associated with the sentence end word
     *
     * @return the sentence end word node
     */
    public WordNode getSentenceEndWordNode() {
        assert sentenceEndWordNode != null;
        return sentenceEndWordNode;
    }


//    private Object getKey(EndNode endNode) {
//        Unit base = endNode.getBaseUnit();
//        Unit lc = endNode.getLeftContext();
//        return null;
//    }


    /** Compiles the vocabulary into an HMM Tree */
    private void compile() {
        collectEntryAndExitUnits();
        entryPointTable = new EntryPointTable(entryPoints);
        addWords();
        entryPointTable.createEntryPointMaps();
        freeze();
    }


    /** Dumps the tree */
    void dumpTree() {
        System.out.println("Dumping Tree ...");
        Map<Node, Node> dupNode = new HashMap<Node, Node>();
        dumpTree(0, getInitialNode(), dupNode);
        System.out.println("... done Dumping Tree");
    }


    /**
     * Dumps the tree
     *
     * @param level   the level of the dump
     * @param node    the root of the tree to dump
     * @param dupNode map of visited nodes
     */
    private void dumpTree(int level, Node node, Map<Node, Node> dupNode) {
        if (dupNode.get(node) == null) {
            dupNode.put(node, node);
            System.out.println(Utilities.pad(level) + node);
            if (!(node instanceof WordNode)) {
                for (Node nextNode : node.getSuccessors()) {
                    dumpTree(level + 1, nextNode, dupNode);
                }
            }
        }
    }


    /** Collects all of the entry and exit points for the vocabulary. */
    private void collectEntryAndExitUnits() {
        Collection<Word> words = getAllWords();
        for (Word word : words) {
            for (int j = 0; j < word.getPronunciations().length; j++) {
                Pronunciation p = word.getPronunciations()[j];
                Unit first = p.getUnits()[0];
                Unit last = p.getUnits()[p.getUnits().length - 1];
                entryPoints.add(first);
                exitPoints.add(last);
            }
        }

        if (debug) {
            System.out.println("Entry Points: " + entryPoints.size());
            System.out.println("Exit Points: " + exitPoints.size());
        }
    }


    /**
     * Called after the lex tree is built. Frees all temporary structures. After this is called, no more words can be
     * added to the lex tree.
     */
    private void freeze() {
        entryPointTable.freeze();
        dictionary = null;
        lm = null;
        exitPoints = null;
        allWords = null;
        wordNodeMap.clear();
        endNodeMap.clear();
    }


    /** Adds the given collection of words to the lex tree */
    private void addWords() {
        Set<Word> words = getAllWords();
        for (Word word : words) {
            addWord(word);
        }
    }


    /**
     * Adds a single word to the lex tree
     *
     * @param word the word to add
     */
    private void addWord(Word word) {
        float prob = getWordUnigramProbability(word);
        Pronunciation[] pronunciations = word.getPronunciations();
        for (Pronunciation pronunciation : pronunciations) {
            addPronunciation(pronunciation, prob);
        }
    }


    /**
     * Adds the given pronunciation to the lex tree
     *
     * @param pronunciation the pronunciation
     * @param probability   the unigram probability
     */
    private void addPronunciation(Pronunciation pronunciation,
                                  float probability) {
        Unit baseUnit;
        Unit lc;
        Unit rc;
        Node curNode;
        WordNode wordNode;

        Unit[] units = pronunciation.getUnits();
        baseUnit = units[0];
        EntryPoint ep = entryPointTable.getEntryPoint(baseUnit);

        ep.addProbability(probability);

        if (units.length > 1) {
            curNode = ep.getNode();
            lc = baseUnit;
            for (int i = 1; i < units.length - 1; i++) {
                baseUnit = units[i];
                rc = units[i + 1];
                HMM hmm = hmmPool.getHMM(baseUnit, lc, rc, HMMPosition.INTERNAL);
                if (hmm == null) {
                    logger.severe("Missing HMM for unit " + baseUnit.getName() + " with lc=" + lc.getName() + " rc=" + rc.getName());
                } else {
                    curNode = curNode.addSuccessor(hmm, probability);
                }
                lc = baseUnit;          // next lc is this baseUnit
            }

            // now add the last unit as an end unit
            baseUnit = units[units.length - 1];
            EndNode endNode = new EndNode(baseUnit, lc, probability);
            curNode = curNode.addSuccessor(endNode, probability);
            wordNode = curNode.addSuccessor(pronunciation, probability, wordNodeMap);
            if (wordNode.getWord().isSentenceEndWord()) {
                sentenceEndWordNode = wordNode;
            }
        } else {
            ep.addSingleUnitWord(pronunciation);
        }
    }

    
    /**
     * Gets the unigram probability for the given word
     *
     * @param word the word
     * @return the unigram probability for the word.
     */
    private float getWordUnigramProbability(Word word) {
        float prob = LogMath.LOG_ONE;
        if (!word.isFiller()) {
            Word[] wordArray = new Word[1];
            wordArray[0] = word;
            prob = lm.getProbability((new WordSequence(wordArray)));
            // System.out.println("gwup: " + word + " " + prob);
            prob *= languageWeight;
        }
        return prob;
    }


    /**
     * Returns the entire set of words, including filler words
     *
     * @return the set of all words (as Word objects)
     */
    private Set<Word> getAllWords() {
        if (allWords == null) {
            allWords = new HashSet<Word>();
            for (String spelling : lm.getVocabulary()) {
                Word word = dictionary.getWord(spelling);
                if (word != null) {
                    allWords.add(word);
                }
            }

            if (addFillerWords) {
                allWords.addAll(Arrays.asList(dictionary.getFillerWords()));
            } else if (addSilenceWord) {
                allWords.add(dictionary.getSilenceWord());
            }
        }
        return allWords;
    }


    /**
     * Returns the initial node for this lex tree
     *
     * @return the initial lex node
     */
    InitialWordNode getInitialNode() {
        return initialNode;
    }


    /** The EntryPoint table is used to manage the set of entry points into the lex tree. */
    class EntryPointTable {

        private final Map<Unit, EntryPoint> entryPoints;


        /**
         * Create the entry point table give the set of all possible entry point units
         *
         * @param entryPointCollection the set of possible entry points
         */
        EntryPointTable(Collection<Unit> entryPointCollection) {
            entryPoints = new HashMap<Unit, EntryPoint>();
            for (Unit unit : entryPointCollection) {
                entryPoints.put(unit, new EntryPoint(unit));
            }
        }


        /**
         * Given a CI unit, return the EntryPoint object that manages the entry point for the unit
         *
         * @param baseUnit the unit of interest (A ci unit)
         * @return the object that manages the entry point for the unit
         */
        EntryPoint getEntryPoint(Unit baseUnit) {
            return entryPoints.get(baseUnit);
        }


        /** Creates the entry point maps for all entry points. */
        void createEntryPointMaps() {
            for (EntryPoint ep : entryPoints.values()) {
                ep.createEntryPointMap();
            }
        }


        /** Freezes the entry point table */
        void freeze() {
            for (EntryPoint ep : entryPoints.values()) {
                ep.freeze();
            }
        }


        /** Dumps the entry point table */
        void dump() {
            for (EntryPoint ep : entryPoints.values()) {
                ep.dump();
            }
        }
    }


    /** Manages a single entry point. */
    class EntryPoint {

        final Unit baseUnit;
        final Node baseNode;      // second units and beyond start here
        final Map<Unit, Node> unitToEntryPointMap;
        List<Pronunciation> singleUnitWords;
        int nodeCount;
        Set<Unit> rcSet;
        float totalProbability;


        /**
         * Creates an entry point for the given unit
         *
         * @param baseUnit the EntryPoint is created for this unit
         */
        EntryPoint(Unit baseUnit) {
            this.baseUnit = baseUnit;
            this.baseNode = new Node(LogMath.LOG_ZERO);
            this.unitToEntryPointMap = new HashMap<Unit, Node>();
            this.singleUnitWords = new ArrayList<Pronunciation>();
            this.totalProbability = LogMath.LOG_ZERO;
        }


        /**
         * Given a left context get a node that represents a single set of entry points into this unit
         *
         * @param leftContext the left context of interest
         * @return the node representing the entry point
         */
        Node getEntryPointsFromLeftContext(Unit leftContext) {
            return unitToEntryPointMap.get(leftContext);
        }


        /**
         * Accumulates the probability for this entry point
         *
         * @param probability a new  probability
         */
        void addProbability(float probability) {
            if (probability > totalProbability) {
                totalProbability = probability;
            }
        }


        /**
         * Returns the probability for all words reachable from this node
         *
         * @return the log probability
         */
        float getProbability() {
            return totalProbability;
        }


        /** Once we have built the full entry point we can eliminate some fields */
        void freeze() {
            for (Node node : unitToEntryPointMap.values()) {
                node.freeze();
            }
            singleUnitWords = null;
            rcSet = null;
        }


        /**
         * Gets the base node for this entry point
         *
         * @return the base node
         */
        Node getNode() {
            return baseNode;
        }


        /**
         * Adds a one-unit word to this entry point. Such single unit words need to be dealt with specially.
         *
         * @param p the pronunciation of the single unit word
         */
        void addSingleUnitWord(Pronunciation p) {
            singleUnitWords.add(p);
        }


        /**
         * Gets the set of possible right contexts that we can transition to from this entry point
         *
         * @return the set of possible transition points.
         */
        private Collection<Unit> getEntryPointRC() {
            if (rcSet == null) {
                rcSet = new HashSet<Unit>();
                for (Node node : baseNode.getSuccessorMap().values()) {
                    UnitNode unitNode = (UnitNode) node;
                    rcSet.add(unitNode.getBaseUnit());
                }
            }
            return rcSet;
        }


        /**
         * A version of createEntryPointMap that compresses common hmms across all entry points.
         */
        void createEntryPointMap() {
            HashMap<HMM, Node> map = new HashMap<HMM, Node>();
            HashMap<HMM, HMMNode> singleUnitMap = new HashMap<HMM, HMMNode>();

            for (Unit lc : exitPoints) {
                Node epNode = new Node(LogMath.LOG_ZERO);
                for (Unit rc : getEntryPointRC()) {
                    HMM hmm = hmmPool.getHMM(baseUnit, lc, rc, HMMPosition.BEGIN);
                    Node addedNode;

                    if ((addedNode = map.get(hmm)) == null) {
                        addedNode = epNode.addSuccessor(hmm, getProbability());
                        map.put(hmm, addedNode);
                    } else {
                        epNode.putSuccessor(hmm, addedNode);
                    }

                    nodeCount++;
                    connectEntryPointNode(addedNode, rc);
                }
                connectSingleUnitWords(lc, epNode, singleUnitMap);
                unitToEntryPointMap.put(lc, epNode);
            }
        }


        /**
         * Connects the single unit words associated with this entry point.   The singleUnitWords list contains all
         * single unit pronunciations that have as their sole unit, the unit associated with this entry point. Entry
         * points for these words are added to the epNode for all possible left (exit) and right (entry) contexts.
         *
         * @param lc     the left context
         * @param epNode the entry point node
         */
        private void connectSingleUnitWords(Unit lc, Node epNode, HashMap<HMM, HMMNode> map) {
            if (!singleUnitWords.isEmpty()) {
                
                for (Unit rc : entryPoints) {
                    HMM hmm = hmmPool.getHMM(baseUnit, lc, rc, HMMPosition.SINGLE);
                    
                    HMMNode tailNode;
                    if (( tailNode = map.get(hmm)) == null) {
                        tailNode = (HMMNode)
                                epNode.addSuccessor(hmm, getProbability());
                        map.put(hmm, tailNode);
                    } else {
                        epNode.putSuccessor(hmm, tailNode);
                    }
                    WordNode wordNode;
                    tailNode.addRC(rc);
                    nodeCount++;

                    for (Pronunciation p : singleUnitWords) {
                        if (p.getWord() == dictionary.getSentenceStartWord()) {
                            initialNode = new InitialWordNode(p, tailNode);
                        } else {
                            float prob = getWordUnigramProbability(p.getWord());
                            wordNode = tailNode.addSuccessor(p, prob, wordNodeMap);
                            if (p.getWord() ==
                                dictionary.getSentenceEndWord()) {
                                sentenceEndWordNode = wordNode;
                            }
                        }
                        nodeCount++;
                    }
                }
            }
        }


        /**
         * Connect the entry points that match the given rc to the given epNode
         *
         * @param epNode add matching successors here
         * @param rc     the next unit
         */
        private void connectEntryPointNode(Node epNode, Unit rc) {
            for (Node node : baseNode.getSuccessors()) {
                UnitNode successor = (UnitNode) node;
                if (successor.getBaseUnit() == rc) {
                    epNode.addSuccessor(successor);
                }
            }
        }


        /** Dumps the entry point */
        void dump() {
            System.out.println("EntryPoint " + baseUnit + " RC Followers: "
                    + getEntryPointRC().size());
            int count = 0;
            Collection<Unit> rcs = getEntryPointRC();
            System.out.print("    ");
            for (Unit rc : rcs) {
                System.out.print(Utilities.pad(rc.getName(), 4));
                if (count++ >= 12) {
                    count = 0;
                    System.out.println();
                    System.out.print("    ");
                }
            }
            System.out.println();
        }
    }


}